c++: retval dtor on rethrow [PR112301]
[official-gcc.git] / gcc / tree-vect-loop-manip.cc
blob43ca985c53ce58aa83fb9689a9ea9b20b207e0a8
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 #include "gimple-fold.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "internal-fn.h"
47 #include "stor-layout.h"
48 #include "optabs-query.h"
49 #include "vec-perm-indices.h"
50 #include "insn-config.h"
51 #include "rtl.h"
52 #include "recog.h"
53 #include "langhooks.h"
54 #include "tree-vector-builder.h"
55 #include "optabs-tree.h"
57 /*************************************************************************
58 Simple Loop Peeling Utilities
60 Utilities to support loop peeling for vectorization purposes.
61 *************************************************************************/
64 /* Renames the use *OP_P. */
66 static void
67 rename_use_op (use_operand_p op_p)
69 tree new_name;
71 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
72 return;
74 new_name = get_current_def (USE_FROM_PTR (op_p));
76 /* Something defined outside of the loop. */
77 if (!new_name)
78 return;
80 /* An ordinary ssa name defined in the loop. */
82 SET_USE (op_p, new_name);
86 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
87 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
88 true. */
90 static void
91 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
93 gimple *stmt;
94 use_operand_p use_p;
95 ssa_op_iter iter;
96 edge e;
97 edge_iterator ei;
98 class loop *loop = bb->loop_father;
99 class loop *outer_loop = NULL;
101 if (rename_from_outer_loop)
103 gcc_assert (loop);
104 outer_loop = loop_outer (loop);
107 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
108 gsi_next (&gsi))
110 stmt = gsi_stmt (gsi);
111 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
112 rename_use_op (use_p);
115 FOR_EACH_EDGE (e, ei, bb->preds)
117 if (!flow_bb_inside_loop_p (loop, e->src))
119 if (!rename_from_outer_loop)
120 continue;
121 if (e->src != outer_loop->header)
123 if (outer_loop->inner->next)
125 /* If outer_loop has 2 inner loops, allow there to
126 be an extra basic block which decides which of the
127 two loops to use using LOOP_VECTORIZED. */
128 if (!single_pred_p (e->src)
129 || single_pred (e->src) != outer_loop->header)
130 continue;
134 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
135 gsi_next (&gsi))
136 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
141 struct adjust_info
143 tree from, to;
144 basic_block bb;
147 /* A stack of values to be adjusted in debug stmts. We have to
148 process them LIFO, so that the closest substitution applies. If we
149 processed them FIFO, without the stack, we might substitute uses
150 with a PHI DEF that would soon become non-dominant, and when we got
151 to the suitable one, it wouldn't have anything to substitute any
152 more. */
153 static vec<adjust_info, va_heap> adjust_vec;
155 /* Adjust any debug stmts that referenced AI->from values to use the
156 loop-closed AI->to, if the references are dominated by AI->bb and
157 not by the definition of AI->from. */
159 static void
160 adjust_debug_stmts_now (adjust_info *ai)
162 basic_block bbphi = ai->bb;
163 tree orig_def = ai->from;
164 tree new_def = ai->to;
165 imm_use_iterator imm_iter;
166 gimple *stmt;
167 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
169 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
171 /* Adjust any debug stmts that held onto non-loop-closed
172 references. */
173 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
175 use_operand_p use_p;
176 basic_block bbuse;
178 if (!is_gimple_debug (stmt))
179 continue;
181 gcc_assert (gimple_debug_bind_p (stmt));
183 bbuse = gimple_bb (stmt);
185 if ((bbuse == bbphi
186 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
187 && !(bbuse == bbdef
188 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
190 if (new_def)
191 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
192 SET_USE (use_p, new_def);
193 else
195 gimple_debug_bind_reset_value (stmt);
196 update_stmt (stmt);
202 /* Adjust debug stmts as scheduled before. */
204 static void
205 adjust_vec_debug_stmts (void)
207 if (!MAY_HAVE_DEBUG_BIND_STMTS)
208 return;
210 gcc_assert (adjust_vec.exists ());
212 while (!adjust_vec.is_empty ())
214 adjust_debug_stmts_now (&adjust_vec.last ());
215 adjust_vec.pop ();
219 /* Adjust any debug stmts that referenced FROM values to use the
220 loop-closed TO, if the references are dominated by BB and not by
221 the definition of FROM. If adjust_vec is non-NULL, adjustments
222 will be postponed until adjust_vec_debug_stmts is called. */
224 static void
225 adjust_debug_stmts (tree from, tree to, basic_block bb)
227 adjust_info ai;
229 if (MAY_HAVE_DEBUG_BIND_STMTS
230 && TREE_CODE (from) == SSA_NAME
231 && ! SSA_NAME_IS_DEFAULT_DEF (from)
232 && ! virtual_operand_p (from))
234 ai.from = from;
235 ai.to = to;
236 ai.bb = bb;
238 if (adjust_vec.exists ())
239 adjust_vec.safe_push (ai);
240 else
241 adjust_debug_stmts_now (&ai);
245 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
246 to adjust any debug stmts that referenced the old phi arg,
247 presumably non-loop-closed references left over from other
248 transformations. */
250 static void
251 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
253 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
255 gcc_assert (TREE_CODE (orig_def) != SSA_NAME
256 || orig_def != new_def);
258 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
260 if (MAY_HAVE_DEBUG_BIND_STMTS)
261 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
262 gimple_bb (update_phi));
265 /* Define one loop rgroup control CTRL from loop LOOP. INIT_CTRL is the value
266 that the control should have during the first iteration and NEXT_CTRL is the
267 value that it should have on subsequent iterations. */
269 static void
270 vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
271 tree next_ctrl)
273 gphi *phi = create_phi_node (ctrl, loop->header);
274 add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
275 add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
278 /* Add SEQ to the end of LOOP's preheader block. */
280 static void
281 add_preheader_seq (class loop *loop, gimple_seq seq)
283 if (seq)
285 edge pe = loop_preheader_edge (loop);
286 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
287 gcc_assert (!new_bb);
291 /* Add SEQ to the beginning of LOOP's header block. */
293 static void
294 add_header_seq (class loop *loop, gimple_seq seq)
296 if (seq)
298 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
299 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
303 /* Return true if the target can interleave elements of two vectors.
304 OFFSET is 0 if the first half of the vectors should be interleaved
305 or 1 if the second half should. When returning true, store the
306 associated permutation in INDICES. */
308 static bool
309 interleave_supported_p (vec_perm_indices *indices, tree vectype,
310 unsigned int offset)
312 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
313 poly_uint64 base = exact_div (nelts, 2) * offset;
314 vec_perm_builder sel (nelts, 2, 3);
315 for (unsigned int i = 0; i < 3; ++i)
317 sel.quick_push (base + i);
318 sel.quick_push (base + i + nelts);
320 indices->new_vector (sel, 2, nelts);
321 return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
322 *indices);
325 /* Try to use permutes to define the masks in DEST_RGM using the masks
326 in SRC_RGM, given that the former has twice as many masks as the
327 latter. Return true on success, adding any new statements to SEQ. */
329 static bool
330 vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
331 rgroup_controls *src_rgm)
333 tree src_masktype = src_rgm->type;
334 tree dest_masktype = dest_rgm->type;
335 machine_mode src_mode = TYPE_MODE (src_masktype);
336 insn_code icode1, icode2;
337 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
338 && (icode1 = optab_handler (vec_unpacku_hi_optab,
339 src_mode)) != CODE_FOR_nothing
340 && (icode2 = optab_handler (vec_unpacku_lo_optab,
341 src_mode)) != CODE_FOR_nothing)
343 /* Unpacking the source masks gives at least as many mask bits as
344 we need. We can then VIEW_CONVERT any excess bits away. */
345 machine_mode dest_mode = insn_data[icode1].operand[0].mode;
346 gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
347 tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
348 for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
350 tree src = src_rgm->controls[i / 2];
351 tree dest = dest_rgm->controls[i];
352 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
353 ? VEC_UNPACK_HI_EXPR
354 : VEC_UNPACK_LO_EXPR);
355 gassign *stmt;
356 if (dest_masktype == unpack_masktype)
357 stmt = gimple_build_assign (dest, code, src);
358 else
360 tree temp = make_ssa_name (unpack_masktype);
361 stmt = gimple_build_assign (temp, code, src);
362 gimple_seq_add_stmt (seq, stmt);
363 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
364 build1 (VIEW_CONVERT_EXPR,
365 dest_masktype, temp));
367 gimple_seq_add_stmt (seq, stmt);
369 return true;
371 vec_perm_indices indices[2];
372 if (dest_masktype == src_masktype
373 && interleave_supported_p (&indices[0], src_masktype, 0)
374 && interleave_supported_p (&indices[1], src_masktype, 1))
376 /* The destination requires twice as many mask bits as the source, so
377 we can use interleaving permutes to double up the number of bits. */
378 tree masks[2];
379 for (unsigned int i = 0; i < 2; ++i)
380 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
381 for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
383 tree src = src_rgm->controls[i / 2];
384 tree dest = dest_rgm->controls[i];
385 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
386 src, src, masks[i & 1]);
387 gimple_seq_add_stmt (seq, stmt);
389 return true;
391 return false;
394 /* Populate DEST_RGM->controls, given that they should add up to STEP.
396 STEP = MIN_EXPR <ivtmp_34, VF>;
398 First length (MIN (X, VF/N)):
399 loop_len_15 = MIN_EXPR <STEP, VF/N>;
401 Second length:
402 tmp = STEP - loop_len_15;
403 loop_len_16 = MIN (tmp, VF/N);
405 Third length:
406 tmp2 = tmp - loop_len_16;
407 loop_len_17 = MIN (tmp2, VF/N);
409 Last length:
410 loop_len_18 = tmp2 - loop_len_17;
413 static void
414 vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
415 rgroup_controls *dest_rgm, tree step)
417 tree ctrl_type = dest_rgm->type;
418 poly_uint64 nitems_per_ctrl
419 = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
420 tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
422 for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
424 tree ctrl = dest_rgm->controls[i];
425 if (i == 0)
427 /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N]. */
428 gassign *assign
429 = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
430 gimple_seq_add_stmt (seq, assign);
432 else if (i == dest_rgm->controls.length () - 1)
434 /* Last iteration: Remain capped to the range [0, VF/N]. */
435 gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
436 dest_rgm->controls[i - 1]);
437 gimple_seq_add_stmt (seq, assign);
439 else
441 /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N]. */
442 step = gimple_build (seq, MINUS_EXPR, iv_type, step,
443 dest_rgm->controls[i - 1]);
444 gassign *assign
445 = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
446 gimple_seq_add_stmt (seq, assign);
451 /* Helper for vect_set_loop_condition_partial_vectors. Generate definitions
452 for all the rgroup controls in RGC and return a control that is nonzero
453 when the loop needs to iterate. Add any new preheader statements to
454 PREHEADER_SEQ. Use LOOP_COND_GSI to insert code before the exit gcond.
456 RGC belongs to loop LOOP. The loop originally iterated NITERS
457 times and has been vectorized according to LOOP_VINFO.
459 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
460 starts with NITERS_SKIP dummy iterations of the scalar loop before
461 the real work starts. The mask elements for these dummy iterations
462 must be 0, to ensure that the extra iterations do not have an effect.
464 It is known that:
466 NITERS * RGC->max_nscalars_per_iter * RGC->factor
468 does not overflow. However, MIGHT_WRAP_P says whether an induction
469 variable that starts at 0 and has step:
471 VF * RGC->max_nscalars_per_iter * RGC->factor
473 might overflow before hitting a value above:
475 (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
477 This means that we cannot guarantee that such an induction variable
478 would ever hit a value that produces a set of all-false masks or zero
479 lengths for RGC.
481 Note: the cost of the code generated by this function is modeled
482 by vect_estimate_min_profitable_iters, so changes here may need
483 corresponding changes there. */
485 static tree
486 vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
487 gimple_seq *preheader_seq,
488 gimple_seq *header_seq,
489 gimple_stmt_iterator loop_cond_gsi,
490 rgroup_controls *rgc, tree niters,
491 tree niters_skip, bool might_wrap_p,
492 tree *iv_step, tree *compare_step)
494 tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
495 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
496 bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
498 tree ctrl_type = rgc->type;
499 unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
500 poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
501 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
502 tree length_limit = NULL_TREE;
503 /* For length, we need length_limit to ensure length in range. */
504 if (!use_masks_p)
505 length_limit = build_int_cst (compare_type, nitems_per_ctrl);
507 /* Calculate the maximum number of item values that the rgroup
508 handles in total, the number that it handles for each iteration
509 of the vector loop, and the number that it should skip during the
510 first iteration of the vector loop. */
511 tree nitems_total = niters;
512 tree nitems_step = build_int_cst (iv_type, vf);
513 tree nitems_skip = niters_skip;
514 if (nitems_per_iter != 1)
516 /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
517 these multiplications don't overflow. */
518 tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
519 tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
520 nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
521 nitems_total, compare_factor);
522 nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
523 nitems_step, iv_factor);
524 if (nitems_skip)
525 nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
526 nitems_skip, compare_factor);
529 /* Create an induction variable that counts the number of items
530 processed. */
531 tree index_before_incr, index_after_incr;
532 gimple_stmt_iterator incr_gsi;
533 bool insert_after;
534 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
535 if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
537 /* Create an IV that counts down from niters_total and whose step
538 is the (variable) amount processed in the current iteration:
540 _10 = (unsigned long) count_12(D);
542 # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
543 _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
545 vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
547 ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
549 if (ivtmp_9 > POLY_INT_CST [4, 4])
550 goto <bb 4>; [83.33%]
551 else
552 goto <bb 5>; [16.67%]
554 nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
555 tree step = rgc->controls.length () == 1 ? rgc->controls[0]
556 : make_ssa_name (iv_type);
557 /* Create decrement IV. */
558 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
560 create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
561 insert_after, &index_before_incr, &index_after_incr);
562 tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
563 index_before_incr, nitems_step);
564 gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
566 else
568 create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
569 &incr_gsi, insert_after, &index_before_incr,
570 &index_after_incr);
571 gimple_seq_add_stmt (header_seq,
572 gimple_build_assign (step, MIN_EXPR,
573 index_before_incr,
574 nitems_step));
576 *iv_step = step;
577 *compare_step = nitems_step;
578 return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
579 : index_before_incr;
582 /* Create increment IV. */
583 create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
584 loop, &incr_gsi, insert_after, &index_before_incr,
585 &index_after_incr);
587 tree zero_index = build_int_cst (compare_type, 0);
588 tree test_index, test_limit, first_limit;
589 gimple_stmt_iterator *test_gsi;
590 if (might_wrap_p)
592 /* In principle the loop should stop iterating once the incremented
593 IV reaches a value greater than or equal to:
595 NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
597 However, there's no guarantee that this addition doesn't overflow
598 the comparison type, or that the IV hits a value above it before
599 wrapping around. We therefore adjust the limit down by one
600 IV step:
602 (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
603 -[infinite-prec] NITEMS_STEP
605 and compare the IV against this limit _before_ incrementing it.
606 Since the comparison type is unsigned, we actually want the
607 subtraction to saturate at zero:
609 (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
610 -[sat] NITEMS_STEP
612 And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
614 NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
616 where the rightmost subtraction can be done directly in
617 COMPARE_TYPE. */
618 test_index = index_before_incr;
619 tree adjust = gimple_convert (preheader_seq, compare_type,
620 nitems_step);
621 if (nitems_skip)
622 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
623 adjust, nitems_skip);
624 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
625 nitems_total, adjust);
626 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
627 test_limit, adjust);
628 test_gsi = &incr_gsi;
630 /* Get a safe limit for the first iteration. */
631 if (nitems_skip)
633 /* The first vector iteration can handle at most NITEMS_STEP
634 items. NITEMS_STEP <= CONST_LIMIT, and adding
635 NITEMS_SKIP to that cannot overflow. */
636 tree const_limit = build_int_cst (compare_type,
637 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
638 * nitems_per_iter);
639 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
640 nitems_total, const_limit);
641 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
642 first_limit, nitems_skip);
644 else
645 /* For the first iteration it doesn't matter whether the IV hits
646 a value above NITEMS_TOTAL. That only matters for the latch
647 condition. */
648 first_limit = nitems_total;
650 else
652 /* Test the incremented IV, which will always hit a value above
653 the bound before wrapping. */
654 test_index = index_after_incr;
655 test_limit = nitems_total;
656 if (nitems_skip)
657 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
658 test_limit, nitems_skip);
659 test_gsi = &loop_cond_gsi;
661 first_limit = test_limit;
664 /* Convert the IV value to the comparison type (either a no-op or
665 a demotion). */
666 gimple_seq test_seq = NULL;
667 test_index = gimple_convert (&test_seq, compare_type, test_index);
668 gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
670 /* Provide a definition of each control in the group. */
671 tree next_ctrl = NULL_TREE;
672 tree ctrl;
673 unsigned int i;
674 FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
676 /* Previous controls will cover BIAS items. This control covers the
677 next batch. */
678 poly_uint64 bias = nitems_per_ctrl * i;
679 tree bias_tree = build_int_cst (compare_type, bias);
681 /* See whether the first iteration of the vector loop is known
682 to have a full control. */
683 poly_uint64 const_limit;
684 bool first_iteration_full
685 = (poly_int_tree_p (first_limit, &const_limit)
686 && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
688 /* Rather than have a new IV that starts at BIAS and goes up to
689 TEST_LIMIT, prefer to use the same 0-based IV for each control
690 and adjust the bound down by BIAS. */
691 tree this_test_limit = test_limit;
692 if (i != 0)
694 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
695 compare_type, this_test_limit,
696 bias_tree);
697 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
698 compare_type, this_test_limit,
699 bias_tree);
702 /* Create the initial control. First include all items that
703 are within the loop limit. */
704 tree init_ctrl = NULL_TREE;
705 if (!first_iteration_full)
707 tree start, end;
708 if (first_limit == test_limit)
710 /* Use a natural test between zero (the initial IV value)
711 and the loop limit. The "else" block would be valid too,
712 but this choice can avoid the need to load BIAS_TREE into
713 a register. */
714 start = zero_index;
715 end = this_test_limit;
717 else
719 /* FIRST_LIMIT is the maximum number of items handled by the
720 first iteration of the vector loop. Test the portion
721 associated with this control. */
722 start = bias_tree;
723 end = first_limit;
726 if (use_masks_p)
727 init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
728 start, end, "max_mask");
729 else
731 init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
732 gimple_seq seq = vect_gen_len (init_ctrl, start,
733 end, length_limit);
734 gimple_seq_add_seq (preheader_seq, seq);
738 /* Now AND out the bits that are within the number of skipped
739 items. */
740 poly_uint64 const_skip;
741 if (nitems_skip
742 && !(poly_int_tree_p (nitems_skip, &const_skip)
743 && known_le (const_skip, bias)))
745 gcc_assert (use_masks_p);
746 tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
747 bias_tree, nitems_skip);
748 if (init_ctrl)
749 init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
750 init_ctrl, unskipped_mask);
751 else
752 init_ctrl = unskipped_mask;
755 if (!init_ctrl)
757 /* First iteration is full. */
758 if (use_masks_p)
759 init_ctrl = build_minus_one_cst (ctrl_type);
760 else
761 init_ctrl = length_limit;
764 /* Get the control value for the next iteration of the loop. */
765 if (use_masks_p)
767 gimple_seq stmts = NULL;
768 next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
769 this_test_limit, "next_mask");
770 gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
772 else
774 next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
775 gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
776 length_limit);
777 gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
780 vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
783 int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
784 if (partial_load_bias != 0)
786 tree adjusted_len = rgc->bias_adjusted_ctrl;
787 gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
788 rgc->controls[0],
789 build_int_cst
790 (TREE_TYPE (rgc->controls[0]),
791 partial_load_bias));
792 gimple_seq_add_stmt (header_seq, minus);
795 return next_ctrl;
798 /* Set up the iteration condition and rgroup controls for LOOP, given
799 that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
800 loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
801 the number of iterations of the original scalar loop that should be
802 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
803 for vect_set_loop_condition.
805 Insert the branch-back condition before LOOP_COND_GSI and return the
806 final gcond. */
808 static gcond *
809 vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
810 loop_vec_info loop_vinfo, tree niters,
811 tree final_iv, bool niters_maybe_zero,
812 gimple_stmt_iterator loop_cond_gsi)
814 gimple_seq preheader_seq = NULL;
815 gimple_seq header_seq = NULL;
817 bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
818 tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
819 unsigned int compare_precision = TYPE_PRECISION (compare_type);
820 tree orig_niters = niters;
822 /* Type of the initial value of NITERS. */
823 tree ni_actual_type = TREE_TYPE (niters);
824 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
825 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
826 if (niters_skip)
827 niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
829 /* Convert NITERS to the same size as the compare. */
830 if (compare_precision > ni_actual_precision
831 && niters_maybe_zero)
833 /* We know that there is always at least one iteration, so if the
834 count is zero then it must have wrapped. Cope with this by
835 subtracting 1 before the conversion and adding 1 to the result. */
836 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
837 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
838 niters, build_minus_one_cst (ni_actual_type));
839 niters = gimple_convert (&preheader_seq, compare_type, niters);
840 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
841 niters, build_one_cst (compare_type));
843 else
844 niters = gimple_convert (&preheader_seq, compare_type, niters);
846 /* Iterate over all the rgroups and fill in their controls. We could use
847 the first control from any rgroup for the loop condition; here we
848 arbitrarily pick the last. */
849 tree test_ctrl = NULL_TREE;
850 tree iv_step = NULL_TREE;
851 tree compare_step = NULL_TREE;
852 rgroup_controls *rgc;
853 rgroup_controls *iv_rgc = nullptr;
854 unsigned int i;
855 auto_vec<rgroup_controls> *controls = use_masks_p
856 ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
857 : &LOOP_VINFO_LENS (loop_vinfo);
858 FOR_EACH_VEC_ELT (*controls, i, rgc)
859 if (!rgc->controls.is_empty ())
861 /* First try using permutes. This adds a single vector
862 instruction to the loop for each mask, but needs no extra
863 loop invariants or IVs. */
864 unsigned int nmasks = i + 1;
865 if (use_masks_p && (nmasks & 1) == 0)
867 rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
868 if (!half_rgc->controls.is_empty ()
869 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
870 continue;
873 if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
874 || !iv_rgc
875 || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
876 != rgc->max_nscalars_per_iter * rgc->factor))
878 /* See whether zero-based IV would ever generate all-false masks
879 or zero length before wrapping around. */
880 bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
882 /* Set up all controls for this group. */
883 test_ctrl
884 = vect_set_loop_controls_directly (loop, loop_vinfo,
885 &preheader_seq, &header_seq,
886 loop_cond_gsi, rgc, niters,
887 niters_skip, might_wrap_p,
888 &iv_step, &compare_step);
890 iv_rgc = rgc;
893 if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
894 && rgc->controls.length () > 1)
896 /* vect_set_loop_controls_directly creates an IV whose step
897 is equal to the expected sum of RGC->controls. Use that
898 information to populate RGC->controls. */
899 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
900 gcc_assert (iv_step);
901 vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
905 /* Emit all accumulated statements. */
906 add_preheader_seq (loop, preheader_seq);
907 add_header_seq (loop, header_seq);
909 /* Get a boolean result that tells us whether to iterate. */
910 gcond *cond_stmt;
911 if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
912 && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
914 gcc_assert (compare_step);
915 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
916 cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
917 NULL_TREE);
919 else
921 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
922 tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
923 cond_stmt
924 = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
926 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
928 /* The loop iterates (NITERS - 1) / VF + 1 times.
929 Subtract one from this to get the latch count. */
930 tree step = build_int_cst (compare_type,
931 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
932 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
933 build_minus_one_cst (compare_type));
934 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
935 niters_minus_one, step);
937 if (final_iv)
939 gassign *assign = gimple_build_assign (final_iv, orig_niters);
940 gsi_insert_on_edge_immediate (exit_edge, assign);
943 return cond_stmt;
946 /* Set up the iteration condition and rgroup controls for LOOP in AVX512
947 style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
948 vectorized loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
949 the number of iterations of the original scalar loop that should be
950 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
951 for vect_set_loop_condition.
953 Insert the branch-back condition before LOOP_COND_GSI and return the
954 final gcond. */
956 static gcond *
957 vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
958 edge exit_edge,
959 loop_vec_info loop_vinfo, tree niters,
960 tree final_iv,
961 bool niters_maybe_zero,
962 gimple_stmt_iterator loop_cond_gsi)
964 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
965 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
966 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
967 tree orig_niters = niters;
968 gimple_seq preheader_seq = NULL;
970 /* Create an IV that counts down from niters and whose step
971 is the number of iterations processed in the current iteration.
972 Produce the controls with compares like the following.
974 # iv_2 = PHI <niters, iv_3>
975 rem_4 = MIN <iv_2, VF>;
976 remv_6 = { rem_4, rem_4, rem_4, ... }
977 mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
978 iv_3 = iv_2 - VF;
979 if (iv_2 > VF)
980 continue;
982 Where the constant is built with elements at most VF - 1 and
983 repetitions according to max_nscalars_per_iter which is guarnateed
984 to be the same within a group. */
986 /* Convert NITERS to the determined IV type. */
987 if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
988 && niters_maybe_zero)
990 /* We know that there is always at least one iteration, so if the
991 count is zero then it must have wrapped. Cope with this by
992 subtracting 1 before the conversion and adding 1 to the result. */
993 gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
994 niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
995 niters, build_minus_one_cst (TREE_TYPE (niters)));
996 niters = gimple_convert (&preheader_seq, iv_type, niters);
997 niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
998 niters, build_one_cst (iv_type));
1000 else
1001 niters = gimple_convert (&preheader_seq, iv_type, niters);
1003 /* Bias the initial value of the IV in case we need to skip iterations
1004 at the beginning. */
1005 tree niters_adj = niters;
1006 if (niters_skip)
1008 tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
1009 niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
1010 iv_type, niters, skip);
1013 /* The iteration step is the vectorization factor. */
1014 tree iv_step = build_int_cst (iv_type, vf);
1016 /* Create the decrement IV. */
1017 tree index_before_incr, index_after_incr;
1018 gimple_stmt_iterator incr_gsi;
1019 bool insert_after;
1020 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1021 create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
1022 &incr_gsi, insert_after, &index_before_incr,
1023 &index_after_incr);
1025 /* Iterate over all the rgroups and fill in their controls. */
1026 for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1028 if (rgc.controls.is_empty ())
1029 continue;
1031 tree ctrl_type = rgc.type;
1032 poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
1034 tree vectype = rgc.compare_type;
1036 /* index_after_incr is the IV specifying the remaining iterations in
1037 the next iteration. */
1038 tree rem = index_after_incr;
1039 /* When the data type for the compare to produce the mask is
1040 smaller than the IV type we need to saturate. Saturate to
1041 the smallest possible value (IV_TYPE) so we only have to
1042 saturate once (CSE will catch redundant ones we add). */
1043 if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
1044 rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1045 UNKNOWN_LOCATION,
1046 MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
1047 rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
1048 UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
1050 /* Build a data vector composed of the remaining iterations. */
1051 rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
1052 UNKNOWN_LOCATION, vectype, rem);
1054 /* Provide a definition of each vector in the control group. */
1055 tree next_ctrl = NULL_TREE;
1056 tree first_rem = NULL_TREE;
1057 tree ctrl;
1058 unsigned int i;
1059 FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
1061 /* Previous controls will cover BIAS items. This control covers the
1062 next batch. */
1063 poly_uint64 bias = nitems_per_ctrl * i;
1065 /* Build the constant to compare the remaining iters against,
1066 this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
1067 split into pieces. */
1068 unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
1069 tree_vector_builder builder (vectype, n, 1);
1070 for (unsigned i = 0; i < n; ++i)
1072 unsigned HOST_WIDE_INT val
1073 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
1074 gcc_assert (val < vf.to_constant ());
1075 builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
1077 tree cmp_series = builder.build ();
1079 /* Create the initial control. First include all items that
1080 are within the loop limit. */
1081 tree init_ctrl = NULL_TREE;
1082 poly_uint64 const_limit;
1083 /* See whether the first iteration of the vector loop is known
1084 to have a full control. */
1085 if (poly_int_tree_p (niters, &const_limit)
1086 && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
1087 init_ctrl = build_minus_one_cst (ctrl_type);
1088 else
1090 /* The remaining work items initially are niters. Saturate,
1091 splat and compare. */
1092 if (!first_rem)
1094 first_rem = niters;
1095 if (TYPE_PRECISION (TREE_TYPE (vectype))
1096 < TYPE_PRECISION (iv_type))
1097 first_rem = gimple_build (&preheader_seq,
1098 MIN_EXPR, TREE_TYPE (first_rem),
1099 first_rem, iv_step);
1100 first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
1101 first_rem);
1102 first_rem = gimple_build_vector_from_val (&preheader_seq,
1103 vectype, first_rem);
1105 init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
1106 cmp_series, first_rem);
1109 /* Now AND out the bits that are within the number of skipped
1110 items. */
1111 poly_uint64 const_skip;
1112 if (niters_skip
1113 && !(poly_int_tree_p (niters_skip, &const_skip)
1114 && known_le (const_skip, bias)))
1116 /* For integer mode masks it's cheaper to shift out the bits
1117 since that avoids loading a constant. */
1118 gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
1119 init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
1120 lang_hooks.types.type_for_mode
1121 (TYPE_MODE (ctrl_type), 1),
1122 init_ctrl);
1123 /* ??? But when the shift amount isn't constant this requires
1124 a round-trip to GRPs. We could apply the bias to either
1125 side of the compare instead. */
1126 tree shift = gimple_build (&preheader_seq, MULT_EXPR,
1127 TREE_TYPE (niters_skip), niters_skip,
1128 build_int_cst (TREE_TYPE (niters_skip),
1129 rgc.max_nscalars_per_iter));
1130 init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
1131 TREE_TYPE (init_ctrl),
1132 init_ctrl, shift);
1133 init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
1134 ctrl_type, init_ctrl);
1137 /* Get the control value for the next iteration of the loop. */
1138 next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1139 UNKNOWN_LOCATION,
1140 LT_EXPR, ctrl_type, cmp_series, rem);
1142 vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
1146 /* Emit all accumulated statements. */
1147 add_preheader_seq (loop, preheader_seq);
1149 /* Adjust the exit test using the decrementing IV. */
1150 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
1151 /* When we peel for alignment with niter_skip != 0 this can
1152 cause niter + niter_skip to wrap and since we are comparing the
1153 value before the decrement here we get a false early exit.
1154 We can't compare the value after decrement either because that
1155 decrement could wrap as well as we're not doing a saturating
1156 decrement. To avoid this situation we force a larger
1157 iv_type. */
1158 gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
1159 NULL_TREE, NULL_TREE);
1160 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1162 /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
1163 Subtract one from this to get the latch count. */
1164 tree niters_minus_one
1165 = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
1166 build_minus_one_cst (TREE_TYPE (orig_niters)));
1167 tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
1168 if (niters_skip)
1169 niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
1170 fold_convert (iv_type, niters_skip));
1171 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
1172 niters_adj2, iv_step);
1174 if (final_iv)
1176 gassign *assign = gimple_build_assign (final_iv, orig_niters);
1177 gsi_insert_on_edge_immediate (single_exit (loop), assign);
1180 return cond_stmt;
1184 /* Like vect_set_loop_condition, but handle the case in which the vector
1185 loop handles exactly VF scalars per iteration. */
1187 static gcond *
1188 vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge,
1189 class loop *loop, tree niters, tree step,
1190 tree final_iv, bool niters_maybe_zero,
1191 gimple_stmt_iterator loop_cond_gsi)
1193 tree indx_before_incr, indx_after_incr;
1194 gcond *cond_stmt;
1195 gcond *orig_cond;
1196 edge pe = loop_preheader_edge (loop);
1197 gimple_stmt_iterator incr_gsi;
1198 bool insert_after;
1199 enum tree_code code;
1200 tree niters_type = TREE_TYPE (niters);
1202 orig_cond = get_loop_exit_condition (exit_edge);
1203 gcc_assert (orig_cond);
1204 loop_cond_gsi = gsi_for_stmt (orig_cond);
1206 tree init, limit;
1207 if (!niters_maybe_zero && integer_onep (step))
1209 /* In this case we can use a simple 0-based IV:
1212 x = 0;
1216 x += 1;
1218 while (x < NITERS); */
1219 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1220 init = build_zero_cst (niters_type);
1221 limit = niters;
1223 else
1225 /* The following works for all values of NITERS except 0:
1228 x = 0;
1232 x += STEP;
1234 while (x <= NITERS - STEP);
1236 so that the loop continues to iterate if x + STEP - 1 < NITERS
1237 but stops if x + STEP - 1 >= NITERS.
1239 However, if NITERS is zero, x never hits a value above NITERS - STEP
1240 before wrapping around. There are two obvious ways of dealing with
1241 this:
1243 - start at STEP - 1 and compare x before incrementing it
1244 - start at -1 and compare x after incrementing it
1246 The latter is simpler and is what we use. The loop in this case
1247 looks like:
1250 x = -1;
1254 x += STEP;
1256 while (x < NITERS - STEP);
1258 In both cases the loop limit is NITERS - STEP. */
1259 gimple_seq seq = NULL;
1260 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
1261 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
1262 if (seq)
1264 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1265 gcc_assert (!new_bb);
1267 if (niters_maybe_zero)
1269 /* Case C. */
1270 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1271 init = build_all_ones_cst (niters_type);
1273 else
1275 /* Case B. */
1276 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
1277 init = build_zero_cst (niters_type);
1281 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1282 create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
1283 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
1284 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
1285 true, NULL_TREE, true,
1286 GSI_SAME_STMT);
1287 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
1288 true, GSI_SAME_STMT);
1290 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
1291 NULL_TREE);
1293 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1295 /* Record the number of latch iterations. */
1296 if (limit == niters)
1297 /* Case A: the loop iterates NITERS times. Subtract one to get the
1298 latch count. */
1299 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
1300 build_int_cst (niters_type, 1));
1301 else
1302 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
1303 Subtract one from this to get the latch count. */
1304 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
1305 limit, step);
1307 if (final_iv)
1309 gassign *assign;
1310 gcc_assert (single_pred_p (exit_edge->dest));
1311 tree phi_dest
1312 = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
1313 /* Make sure to maintain LC SSA form here and elide the subtraction
1314 if the value is zero. */
1315 gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
1316 add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
1317 if (!integer_zerop (init))
1319 assign = gimple_build_assign (final_iv, MINUS_EXPR,
1320 phi_dest, init);
1321 gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest);
1322 gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
1326 return cond_stmt;
1329 /* If we're using fully-masked loops, make LOOP iterate:
1331 N == (NITERS - 1) / STEP + 1
1333 times. When NITERS is zero, this is equivalent to making the loop
1334 execute (1 << M) / STEP times, where M is the precision of NITERS.
1335 NITERS_MAYBE_ZERO is true if this last case might occur.
1337 If we're not using fully-masked loops, make LOOP iterate:
1339 N == (NITERS - STEP) / STEP + 1
1341 times, where NITERS is known to be outside the range [1, STEP - 1].
1342 This is equivalent to making the loop execute NITERS / STEP times
1343 when NITERS is nonzero and (1 << M) / STEP times otherwise.
1344 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
1346 If FINAL_IV is nonnull, it is an SSA name that should be set to
1347 N * STEP on exit from the loop.
1349 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
1351 void
1352 vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
1353 tree niters, tree step, tree final_iv,
1354 bool niters_maybe_zero)
1356 gcond *cond_stmt;
1357 gcond *orig_cond = get_loop_exit_condition (loop_e);
1358 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
1360 if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
1362 if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
1363 cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e,
1364 loop_vinfo,
1365 niters, final_iv,
1366 niters_maybe_zero,
1367 loop_cond_gsi);
1368 else
1369 cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e,
1370 loop_vinfo,
1371 niters, final_iv,
1372 niters_maybe_zero,
1373 loop_cond_gsi);
1375 else
1376 cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop,
1377 niters,
1378 step, final_iv,
1379 niters_maybe_zero,
1380 loop_cond_gsi);
1382 /* Remove old loop exit test. */
1383 stmt_vec_info orig_cond_info;
1384 if (loop_vinfo
1385 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
1386 loop_vinfo->remove_stmt (orig_cond_info);
1387 else
1388 gsi_remove (&loop_cond_gsi, true);
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
1392 (gimple *) cond_stmt);
1395 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
1396 For all PHI arguments in FROM->dest and TO->dest from those
1397 edges ensure that TO->dest PHI arguments have current_def
1398 to that in from. */
1400 static void
1401 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
1403 gimple_stmt_iterator gsi_from, gsi_to;
1405 for (gsi_from = gsi_start_phis (from->dest),
1406 gsi_to = gsi_start_phis (to->dest);
1407 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
1409 gimple *from_phi = gsi_stmt (gsi_from);
1410 gimple *to_phi = gsi_stmt (gsi_to);
1411 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
1412 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
1413 if (virtual_operand_p (from_arg))
1415 gsi_next (&gsi_from);
1416 continue;
1418 if (virtual_operand_p (to_arg))
1420 gsi_next (&gsi_to);
1421 continue;
1423 if (TREE_CODE (from_arg) != SSA_NAME)
1424 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
1425 else if (TREE_CODE (to_arg) == SSA_NAME
1426 && from_arg != to_arg)
1428 if (get_current_def (to_arg) == NULL_TREE)
1430 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
1431 TREE_TYPE (get_current_def
1432 (from_arg))));
1433 set_current_def (to_arg, get_current_def (from_arg));
1436 gsi_next (&gsi_from);
1437 gsi_next (&gsi_to);
1440 gphi *from_phi = get_virtual_phi (from->dest);
1441 gphi *to_phi = get_virtual_phi (to->dest);
1442 if (from_phi)
1443 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
1444 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
1447 /* Given LOOP this function generates a new copy of it and puts it
1448 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
1449 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1450 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1451 entry or exit of LOOP. If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
1452 continuation. This is correct for cases where one loop continues from the
1453 other like in the vectorizer, but not true for uses in e.g. loop distribution
1454 where the contents of the loop body are split but the iteration space of both
1455 copies remains the same.
1457 If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
1458 dominators were updated during the peeling. */
1460 class loop *
1461 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
1462 class loop *scalar_loop,
1463 edge scalar_exit, edge e, edge *new_e,
1464 bool flow_loops)
1466 class loop *new_loop;
1467 basic_block *new_bbs, *bbs, *pbbs;
1468 bool at_exit;
1469 bool was_imm_dom;
1470 basic_block exit_dest;
1471 edge exit, new_exit;
1472 bool duplicate_outer_loop = false;
1474 exit = loop_exit;
1475 at_exit = (e == exit);
1476 if (!at_exit && e != loop_preheader_edge (loop))
1477 return NULL;
1479 if (scalar_loop == NULL)
1481 scalar_loop = loop;
1482 scalar_exit = loop_exit;
1484 else if (scalar_loop == loop)
1485 scalar_exit = loop_exit;
1486 else
1488 /* Loop has been version, match exits up using the aux index. */
1489 for (edge exit : get_loop_exit_edges (scalar_loop))
1490 if (exit->aux == loop_exit->aux)
1492 scalar_exit = exit;
1493 break;
1496 gcc_assert (scalar_exit);
1499 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1500 pbbs = bbs + 1;
1501 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1502 /* Allow duplication of outer loops. */
1503 if (scalar_loop->inner)
1504 duplicate_outer_loop = true;
1506 /* Generate new loop structure. */
1507 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1508 duplicate_subloops (scalar_loop, new_loop);
1510 exit_dest = exit->dest;
1511 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1512 exit_dest) == loop->header ?
1513 true : false);
1515 /* Also copy the pre-header, this avoids jumping through hoops to
1516 duplicate the loop entry PHI arguments. Create an empty
1517 pre-header unconditionally for this. */
1518 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1519 edge entry_e = single_pred_edge (preheader);
1520 bbs[0] = preheader;
1521 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1523 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
1524 &scalar_exit, 1, &new_exit, NULL,
1525 at_exit ? loop->latch : e->src, true);
1526 exit = loop_exit;
1527 basic_block new_preheader = new_bbs[0];
1529 gcc_assert (new_exit);
1531 /* Record the new loop exit information. new_loop doesn't have SCEV data and
1532 so we must initialize the exit information. */
1533 if (new_e)
1534 *new_e = new_exit;
1536 /* Before installing PHI arguments make sure that the edges
1537 into them match that of the scalar loop we analyzed. This
1538 makes sure the SLP tree matches up between the main vectorized
1539 loop and the epilogue vectorized copies. */
1540 if (single_succ_edge (preheader)->dest_idx
1541 != single_succ_edge (new_bbs[0])->dest_idx)
1543 basic_block swap_bb = new_bbs[1];
1544 gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1545 std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1546 EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1547 EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1549 if (duplicate_outer_loop)
1551 class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
1552 if (loop_preheader_edge (scalar_loop)->dest_idx
1553 != loop_preheader_edge (new_inner_loop)->dest_idx)
1555 basic_block swap_bb = new_inner_loop->header;
1556 gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1557 std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1558 EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1559 EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1563 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
1565 /* Skip new preheader since it's deleted if copy loop is added at entry. */
1566 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
1567 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
1569 /* Rename the exit uses. */
1570 for (edge exit : get_loop_exit_edges (new_loop))
1571 for (auto gsi = gsi_start_phis (exit->dest);
1572 !gsi_end_p (gsi); gsi_next (&gsi))
1574 tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
1575 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
1576 if (MAY_HAVE_DEBUG_BIND_STMTS)
1577 adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
1580 /* This condition happens when the loop has been versioned. e.g. due to ifcvt
1581 versioning the loop. */
1582 if (scalar_loop != loop)
1584 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1585 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1586 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
1587 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1588 header) to have current_def set, so copy them over. */
1589 slpeel_duplicate_current_defs_from_edges (scalar_exit, exit);
1590 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1592 EDGE_SUCC (loop->latch, 0));
1595 auto loop_exits = get_loop_exit_edges (loop);
1596 auto_vec<basic_block> doms;
1598 if (at_exit) /* Add the loop copy at exit. */
1600 if (scalar_loop != loop && new_exit->dest != exit_dest)
1602 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1603 flush_pending_stmts (new_exit);
1606 auto_vec <gimple *> new_phis;
1607 hash_map <tree, tree> new_phi_args;
1608 /* First create the empty phi nodes so that when we flush the
1609 statements they can be filled in. However because there is no order
1610 between the PHI nodes in the exits and the loop headers we need to
1611 order them base on the order of the two headers. First record the new
1612 phi nodes. */
1613 for (auto gsi_from = gsi_start_phis (scalar_exit->dest);
1614 !gsi_end_p (gsi_from); gsi_next (&gsi_from))
1616 gimple *from_phi = gsi_stmt (gsi_from);
1617 tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
1618 gphi *res = create_phi_node (new_res, new_preheader);
1619 new_phis.safe_push (res);
1622 /* Then redirect the edges and flush the changes. This writes out the new
1623 SSA names. */
1624 for (edge exit : loop_exits)
1626 edge temp_e = redirect_edge_and_branch (exit, new_preheader);
1627 flush_pending_stmts (temp_e);
1629 /* Record the new SSA names in the cache so that we can skip materializing
1630 them again when we fill in the rest of the LCSSA variables. */
1631 for (auto phi : new_phis)
1633 tree new_arg = gimple_phi_arg (phi, 0)->def;
1635 if (!SSA_VAR_P (new_arg))
1636 continue;
1637 /* If the PHI MEM node dominates the loop then we shouldn't create
1638 a new LC-SSSA PHI for it in the intermediate block. */
1639 /* A MEM phi that consitutes a new DEF for the vUSE chain can either
1640 be a .VDEF or a PHI that operates on MEM. And said definition
1641 must not be inside the main loop. Or we must be a parameter.
1642 In the last two cases we may remove a non-MEM PHI node, but since
1643 they dominate both loops the removal is unlikely to cause trouble
1644 as the exits must already be using them. */
1645 if (virtual_operand_p (new_arg)
1646 && (SSA_NAME_IS_DEFAULT_DEF (new_arg)
1647 || !flow_bb_inside_loop_p (loop,
1648 gimple_bb (SSA_NAME_DEF_STMT (new_arg)))))
1650 auto gsi = gsi_for_stmt (phi);
1651 remove_phi_node (&gsi, true);
1652 continue;
1654 new_phi_args.put (new_arg, gimple_phi_result (phi));
1656 if (TREE_CODE (new_arg) != SSA_NAME)
1657 continue;
1658 /* If the PHI node dominates the loop then we shouldn't create
1659 a new LC-SSSA PHI for it in the intermediate block. Unless the
1660 the loop has been versioned. If it has then we need the PHI
1661 node such that later when the loop guard is added the original
1662 dominating PHI can be found. */
1663 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (new_arg));
1664 if (loop == scalar_loop
1665 && (!def_bb || !flow_bb_inside_loop_p (loop, def_bb)))
1667 auto gsi = gsi_for_stmt (phi);
1668 remove_phi_node (&gsi, true);
1672 /* Copy the current loop LC PHI nodes between the original loop exit
1673 block and the new loop header. This allows us to later split the
1674 preheader block and still find the right LC nodes. */
1675 edge loop_entry = single_succ_edge (new_preheader);
1676 if (flow_loops)
1677 for (auto gsi_from = gsi_start_phis (loop->header),
1678 gsi_to = gsi_start_phis (new_loop->header);
1679 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
1680 gsi_next (&gsi_from), gsi_next (&gsi_to))
1682 gimple *from_phi = gsi_stmt (gsi_from);
1683 gimple *to_phi = gsi_stmt (gsi_to);
1684 tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1685 loop_latch_edge (loop));
1687 /* Check if we've already created a new phi node during edge
1688 redirection. If we have, only propagate the value downwards. */
1689 if (tree *res = new_phi_args.get (new_arg))
1691 adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
1692 continue;
1695 tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
1696 gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
1698 /* Main loop exit should use the final iter value. */
1699 add_phi_arg (lcssa_phi, new_arg, loop_exit, UNKNOWN_LOCATION);
1701 adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
1704 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
1706 if (was_imm_dom || duplicate_outer_loop)
1707 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1709 /* And remove the non-necessary forwarder again. Keep the other
1710 one so we have a proper pre-header for the loop at the exit edge. */
1711 redirect_edge_pred (single_succ_edge (preheader),
1712 single_pred (preheader));
1713 delete_basic_block (preheader);
1714 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1715 loop_preheader_edge (scalar_loop)->src);
1717 else /* Add the copy at entry. */
1719 /* Copy the current loop LC PHI nodes between the original loop exit
1720 block and the new loop header. This allows us to later split the
1721 preheader block and still find the right LC nodes. */
1722 if (flow_loops)
1723 for (auto gsi_from = gsi_start_phis (new_loop->header),
1724 gsi_to = gsi_start_phis (loop->header);
1725 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
1726 gsi_next (&gsi_from), gsi_next (&gsi_to))
1728 gimple *from_phi = gsi_stmt (gsi_from);
1729 gimple *to_phi = gsi_stmt (gsi_to);
1730 tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1731 loop_latch_edge (new_loop));
1732 adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop),
1733 new_arg);
1736 if (scalar_loop != loop)
1738 /* Remove the non-necessary forwarder of scalar_loop again. */
1739 redirect_edge_pred (single_succ_edge (preheader),
1740 single_pred (preheader));
1741 delete_basic_block (preheader);
1742 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1743 loop_preheader_edge (scalar_loop)->src);
1744 preheader = split_edge (loop_preheader_edge (loop));
1745 entry_e = single_pred_edge (preheader);
1748 redirect_edge_and_branch_force (entry_e, new_preheader);
1749 flush_pending_stmts (entry_e);
1750 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1752 redirect_edge_and_branch_force (new_exit, preheader);
1753 flush_pending_stmts (new_exit);
1754 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1756 /* And remove the non-necessary forwarder again. Keep the other
1757 one so we have a proper pre-header for the loop at the exit edge. */
1758 redirect_edge_pred (single_succ_edge (new_preheader),
1759 single_pred (new_preheader));
1760 delete_basic_block (new_preheader);
1761 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1762 loop_preheader_edge (new_loop)->src);
1765 free (new_bbs);
1766 free (bbs);
1768 checking_verify_dominators (CDI_DOMINATORS);
1770 return new_loop;
1774 /* Given the condition expression COND, put it as the last statement of
1775 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1776 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
1777 skip the loop. PROBABILITY is the skip edge's probability. Mark the
1778 new edge as irreducible if IRREDUCIBLE_P is true. */
1780 static edge
1781 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
1782 basic_block guard_to, basic_block dom_bb,
1783 profile_probability probability, bool irreducible_p)
1785 gimple_stmt_iterator gsi;
1786 edge new_e, enter_e;
1787 gcond *cond_stmt;
1788 gimple_seq gimplify_stmt_list = NULL;
1790 enter_e = EDGE_SUCC (guard_bb, 0);
1791 enter_e->flags &= ~EDGE_FALLTHRU;
1792 enter_e->flags |= EDGE_FALSE_VALUE;
1793 gsi = gsi_last_bb (guard_bb);
1795 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
1796 is_gimple_condexpr_for_cond, NULL_TREE);
1797 if (gimplify_stmt_list)
1798 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1800 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
1801 gsi = gsi_last_bb (guard_bb);
1802 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1804 /* Add new edge to connect guard block to the merge/loop-exit block. */
1805 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
1807 new_e->probability = probability;
1808 if (irreducible_p)
1809 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1811 enter_e->probability = probability.invert ();
1812 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
1814 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
1815 if (enter_e->dest->loop_father->header == enter_e->dest)
1816 split_edge (enter_e);
1818 return new_e;
1822 /* This function verifies that the following restrictions apply to LOOP:
1823 (1) it consists of exactly 2 basic blocks - header, and an empty latch
1824 for innermost loop and 5 basic blocks for outer-loop.
1825 (2) it is single entry, single exit
1826 (3) its exit condition is the last stmt in the header
1827 (4) E is the entry/exit edge of LOOP.
1830 bool
1831 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
1832 const_edge e)
1834 edge entry_e = loop_preheader_edge (loop);
1835 gcond *orig_cond = get_loop_exit_condition (exit_e);
1836 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1837 unsigned int num_bb = loop->inner? 5 : 2;
1839 /* All loops have an outer scope; the only case loop->outer is NULL is for
1840 the function itself. */
1841 if (!loop_outer (loop)
1842 || loop->num_nodes != num_bb
1843 || !empty_block_p (loop->latch)
1844 || !exit_e
1845 /* Verify that new loop exit condition can be trivially modified. */
1846 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1847 || (e != exit_e && e != entry_e))
1848 return false;
1850 basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
1851 get_loop_body_with_size (loop, bbs, loop->num_nodes);
1852 bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
1853 free (bbs);
1854 return ret;
1857 /* Function find_loop_location.
1859 Extract the location of the loop in the source code.
1860 If the loop is not well formed for vectorization, an estimated
1861 location is calculated.
1862 Return the loop location if succeed and NULL if not. */
1864 dump_user_location_t
1865 find_loop_location (class loop *loop)
1867 gimple *stmt = NULL;
1868 basic_block bb;
1869 gimple_stmt_iterator si;
1871 if (!loop)
1872 return dump_user_location_t ();
1874 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
1876 /* We only care about the loop location, so use any exit with location
1877 information. */
1878 for (edge e : get_loop_exit_edges (loop))
1880 stmt = get_loop_exit_condition (e);
1882 if (stmt
1883 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1884 return stmt;
1888 /* If we got here the loop is probably not "well formed",
1889 try to estimate the loop location */
1891 if (!loop->header)
1892 return dump_user_location_t ();
1894 bb = loop->header;
1896 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1898 stmt = gsi_stmt (si);
1899 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1900 return stmt;
1903 return dump_user_location_t ();
1906 /* Return true if the phi described by STMT_INFO defines an IV of the
1907 loop to be vectorized. */
1909 static bool
1910 iv_phi_p (stmt_vec_info stmt_info)
1912 gphi *phi = as_a <gphi *> (stmt_info->stmt);
1913 if (virtual_operand_p (PHI_RESULT (phi)))
1914 return false;
1916 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1917 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1918 return false;
1920 return true;
1923 /* Return true if vectorizer can peel for nonlinear iv. */
1924 static bool
1925 vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
1926 stmt_vec_info stmt_info)
1928 enum vect_induction_op_type induction_type
1929 = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
1930 tree niters_skip;
1931 /* Init_expr will be update by vect_update_ivs_after_vectorizer,
1932 if niters or vf is unkown:
1933 For shift, when shift mount >= precision, there would be UD.
1934 For mult, don't known how to generate
1935 init_expr * pow (step, niters) for variable niters.
1936 For neg, it should be ok, since niters of vectorized main loop
1937 will always be multiple of 2. */
1938 if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1939 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
1940 && induction_type != vect_step_op_neg)
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "Peeling for epilogue is not supported"
1945 " for nonlinear induction except neg"
1946 " when iteration count is unknown.\n");
1947 return false;
1950 /* Avoid compile time hog on vect_peel_nonlinear_iv_init. */
1951 if (induction_type == vect_step_op_mul)
1953 tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1954 tree type = TREE_TYPE (step_expr);
1956 if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
1957 && LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1961 "Avoid compile time hog on"
1962 " vect_peel_nonlinear_iv_init"
1963 " for nonlinear induction vec_step_op_mul"
1964 " when iteration count is too big.\n");
1965 return false;
1969 /* Also doens't support peel for neg when niter is variable.
1970 ??? generate something like niter_expr & 1 ? init_expr : -init_expr? */
1971 niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
1972 if ((niters_skip != NULL_TREE
1973 && (TREE_CODE (niters_skip) != INTEGER_CST
1974 || (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
1975 || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
1976 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
1978 if (dump_enabled_p ())
1979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1980 "Peeling for alignement is not supported"
1981 " for nonlinear induction when niters_skip"
1982 " is not constant.\n");
1983 return false;
1986 return true;
1989 /* Function vect_can_advance_ivs_p
1991 In case the number of iterations that LOOP iterates is unknown at compile
1992 time, an epilog loop will be generated, and the loop induction variables
1993 (IVs) will be "advanced" to the value they are supposed to take just before
1994 the epilog loop. Here we check that the access function of the loop IVs
1995 and the expression that represents the loop bound are simple enough.
1996 These restrictions will be relaxed in the future. */
1998 bool
1999 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2001 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2002 basic_block bb = loop->header;
2003 gphi_iterator gsi;
2005 /* Analyze phi functions of the loop header. */
2007 if (dump_enabled_p ())
2008 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
2009 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2011 tree evolution_part;
2012 enum vect_induction_op_type induction_type;
2014 gphi *phi = gsi.phi ();
2015 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2016 if (dump_enabled_p ())
2017 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
2018 phi_info->stmt);
2020 /* Skip virtual phi's. The data dependences that are associated with
2021 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
2023 Skip reduction phis. */
2024 if (!iv_phi_p (phi_info))
2026 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_NOTE, vect_location,
2028 "reduc or virtual phi. skip.\n");
2029 continue;
2032 induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2033 if (induction_type != vect_step_op_add)
2035 if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
2036 return false;
2038 continue;
2041 /* Analyze the evolution function. */
2043 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2044 if (evolution_part == NULL_TREE)
2046 if (dump_enabled_p ())
2047 dump_printf (MSG_MISSED_OPTIMIZATION,
2048 "No access function or evolution.\n");
2049 return false;
2052 /* FORNOW: We do not transform initial conditions of IVs
2053 which evolution functions are not invariants in the loop. */
2055 if (!expr_invariant_in_loop_p (loop, evolution_part))
2057 if (dump_enabled_p ())
2058 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2059 "evolution not invariant in loop.\n");
2060 return false;
2063 /* FORNOW: We do not transform initial conditions of IVs
2064 which evolution functions are a polynomial of degree >= 2. */
2066 if (tree_is_chrec (evolution_part))
2068 if (dump_enabled_p ())
2069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2070 "evolution is chrec.\n");
2071 return false;
2075 return true;
2079 /* Function vect_update_ivs_after_vectorizer.
2081 "Advance" the induction variables of LOOP to the value they should take
2082 after the execution of LOOP. This is currently necessary because the
2083 vectorizer does not handle induction variables that are used after the
2084 loop. Such a situation occurs when the last iterations of LOOP are
2085 peeled, because:
2086 1. We introduced new uses after LOOP for IVs that were not originally used
2087 after LOOP: the IVs of LOOP are now used by an epilog loop.
2088 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2089 times, whereas the loop IVs should be bumped N times.
2091 Input:
2092 - LOOP - a loop that is going to be vectorized. The last few iterations
2093 of LOOP were peeled.
2094 - NITERS - the number of iterations that LOOP executes (before it is
2095 vectorized). i.e, the number of times the ivs should be bumped.
2096 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2097 coming out from LOOP on which there are uses of the LOOP ivs
2098 (this is the path from LOOP->exit to epilog_loop->preheader).
2100 The new definitions of the ivs are placed in LOOP->exit.
2101 The phi args associated with the edge UPDATE_E in the bb
2102 UPDATE_E->dest are updated accordingly.
2104 Assumption 1: Like the rest of the vectorizer, this function assumes
2105 a single loop exit that has a single predecessor.
2107 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2108 organized in the same order.
2110 Assumption 3: The access function of the ivs is simple enough (see
2111 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2113 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2114 coming out of LOOP on which the ivs of LOOP are used (this is the path
2115 that leads to the epilog loop; other paths skip the epilog loop). This
2116 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2117 needs to have its phis updated.
2120 static void
2121 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
2122 tree niters, edge update_e)
2124 gphi_iterator gsi, gsi1;
2125 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2126 basic_block update_bb = update_e->dest;
2128 basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
2130 /* Make sure there exists a single-predecessor exit bb: */
2131 gcc_assert (single_pred_p (exit_bb));
2132 gcc_assert (single_succ_edge (exit_bb) == update_e);
2134 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
2135 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
2136 gsi_next (&gsi), gsi_next (&gsi1))
2138 tree init_expr;
2139 tree step_expr, off;
2140 tree type;
2141 tree var, ni, ni_name;
2142 gimple_stmt_iterator last_gsi;
2144 gphi *phi = gsi.phi ();
2145 gphi *phi1 = gsi1.phi ();
2146 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_NOTE, vect_location,
2149 "vect_update_ivs_after_vectorizer: phi: %G",
2150 (gimple *) phi);
2152 /* Skip reduction and virtual phis. */
2153 if (!iv_phi_p (phi_info))
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_NOTE, vect_location,
2157 "reduc or virtual phi. skip.\n");
2158 continue;
2161 type = TREE_TYPE (gimple_phi_result (phi));
2162 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2163 step_expr = unshare_expr (step_expr);
2165 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2166 of degree >= 2 or exponential. */
2167 gcc_assert (!tree_is_chrec (step_expr));
2169 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2170 gimple_seq stmts = NULL;
2171 enum vect_induction_op_type induction_type
2172 = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2174 if (induction_type == vect_step_op_add)
2176 tree stype = TREE_TYPE (step_expr);
2177 off = fold_build2 (MULT_EXPR, stype,
2178 fold_convert (stype, niters), step_expr);
2179 if (POINTER_TYPE_P (type))
2180 ni = fold_build_pointer_plus (init_expr, off);
2181 else
2182 ni = fold_convert (type,
2183 fold_build2 (PLUS_EXPR, stype,
2184 fold_convert (stype, init_expr),
2185 off));
2187 /* Don't bother call vect_peel_nonlinear_iv_init. */
2188 else if (induction_type == vect_step_op_neg)
2189 ni = init_expr;
2190 else
2191 ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
2192 niters, step_expr,
2193 induction_type);
2195 var = create_tmp_var (type, "tmp");
2197 last_gsi = gsi_last_bb (exit_bb);
2198 gimple_seq new_stmts = NULL;
2199 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
2200 /* Exit_bb shouldn't be empty. */
2201 if (!gsi_end_p (last_gsi))
2203 gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
2204 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
2206 else
2208 gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
2209 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
2212 /* Fix phi expressions in the successor bb. */
2213 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
2217 /* Return a gimple value containing the misalignment (measured in vector
2218 elements) for the loop described by LOOP_VINFO, i.e. how many elements
2219 it is away from a perfectly aligned address. Add any new statements
2220 to SEQ. */
2222 static tree
2223 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
2225 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2226 stmt_vec_info stmt_info = dr_info->stmt;
2227 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2229 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2230 unsigned HOST_WIDE_INT target_align_c;
2231 tree target_align_minus_1;
2233 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2234 size_zero_node) < 0;
2235 tree offset = (negative
2236 ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2237 * TREE_INT_CST_LOW
2238 (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
2239 : size_zero_node);
2240 tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
2241 stmt_info, seq,
2242 offset);
2243 tree type = unsigned_type_for (TREE_TYPE (start_addr));
2244 if (target_align.is_constant (&target_align_c))
2245 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
2246 else
2248 tree vla = build_int_cst (type, target_align);
2249 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
2250 fold_build2 (MINUS_EXPR, type,
2251 build_int_cst (type, 0), vla));
2252 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
2253 build_int_cst (type, 1));
2256 HOST_WIDE_INT elem_size
2257 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2258 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
2260 /* Create: misalign_in_bytes = addr & (target_align - 1). */
2261 tree int_start_addr = fold_convert (type, start_addr);
2262 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
2263 target_align_minus_1);
2265 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
2266 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
2267 elem_size_log);
2269 return misalign_in_elems;
2272 /* Function vect_gen_prolog_loop_niters
2274 Generate the number of iterations which should be peeled as prolog for the
2275 loop represented by LOOP_VINFO. It is calculated as the misalignment of
2276 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
2277 As a result, after the execution of this loop, the data reference DR will
2278 refer to an aligned location. The following computation is generated:
2280 If the misalignment of DR is known at compile time:
2281 addr_mis = int mis = DR_MISALIGNMENT (dr);
2282 Else, compute address misalignment in bytes:
2283 addr_mis = addr & (target_align - 1)
2285 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
2287 (elem_size = element type size; an element is the scalar element whose type
2288 is the inner type of the vectype)
2290 The computations will be emitted at the end of BB. We also compute and
2291 store upper bound (included) of the result in BOUND.
2293 When the step of the data-ref in the loop is not 1 (as in interleaved data
2294 and SLP), the number of iterations of the prolog must be divided by the step
2295 (which is equal to the size of interleaved group).
2297 The above formulas assume that VF == number of elements in the vector. This
2298 may not hold when there are multiple-types in the loop.
2299 In this case, for some data-references in the loop the VF does not represent
2300 the number of elements that fit in the vector. Therefore, instead of VF we
2301 use TYPE_VECTOR_SUBPARTS. */
2303 static tree
2304 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
2305 basic_block bb, int *bound)
2307 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2308 tree var;
2309 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2310 gimple_seq stmts = NULL, new_stmts = NULL;
2311 tree iters, iters_name;
2312 stmt_vec_info stmt_info = dr_info->stmt;
2313 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2314 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2316 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2318 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_NOTE, vect_location,
2322 "known peeling = %d.\n", npeel);
2324 iters = build_int_cst (niters_type, npeel);
2325 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2327 else
2329 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
2330 tree type = TREE_TYPE (misalign_in_elems);
2331 HOST_WIDE_INT elem_size
2332 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2333 /* We only do prolog peeling if the target alignment is known at compile
2334 time. */
2335 poly_uint64 align_in_elems =
2336 exact_div (target_align, elem_size);
2337 tree align_in_elems_minus_1 =
2338 build_int_cst (type, align_in_elems - 1);
2339 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
2341 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
2342 & (align_in_elems - 1)). */
2343 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2344 size_zero_node) < 0;
2345 if (negative)
2346 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
2347 align_in_elems_tree);
2348 else
2349 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
2350 misalign_in_elems);
2351 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
2352 iters = fold_convert (niters_type, iters);
2353 unsigned HOST_WIDE_INT align_in_elems_c;
2354 if (align_in_elems.is_constant (&align_in_elems_c))
2355 *bound = align_in_elems_c - 1;
2356 else
2357 *bound = -1;
2360 if (dump_enabled_p ())
2361 dump_printf_loc (MSG_NOTE, vect_location,
2362 "niters for prolog loop: %T\n", iters);
2364 var = create_tmp_var (niters_type, "prolog_loop_niters");
2365 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
2367 if (new_stmts)
2368 gimple_seq_add_seq (&stmts, new_stmts);
2369 if (stmts)
2371 gcc_assert (single_succ_p (bb));
2372 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2373 if (gsi_end_p (gsi))
2374 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2375 else
2376 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2378 return iters_name;
2382 /* Function vect_update_init_of_dr
2384 If CODE is PLUS, the vector loop starts NITERS iterations after the
2385 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
2386 iterations before the scalar one (using masking to skip inactive
2387 elements). This function updates the information recorded in DR to
2388 account for the difference. Specifically, it updates the OFFSET
2389 field of DR_INFO. */
2391 static void
2392 vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
2394 struct data_reference *dr = dr_info->dr;
2395 tree offset = dr_info->offset;
2396 if (!offset)
2397 offset = build_zero_cst (sizetype);
2399 niters = fold_build2 (MULT_EXPR, sizetype,
2400 fold_convert (sizetype, niters),
2401 fold_convert (sizetype, DR_STEP (dr)));
2402 offset = fold_build2 (code, sizetype,
2403 fold_convert (sizetype, offset), niters);
2404 dr_info->offset = offset;
2408 /* Function vect_update_inits_of_drs
2410 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
2411 CODE and NITERS are as for vect_update_inits_of_dr. */
2413 void
2414 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
2415 tree_code code)
2417 unsigned int i;
2418 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2419 struct data_reference *dr;
2421 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
2423 /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
2424 here, but since we might use these niters to update the epilogues niters
2425 and data references we can't insert them here as this definition might not
2426 always dominate its uses. */
2427 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
2428 niters = fold_convert (sizetype, niters);
2430 FOR_EACH_VEC_ELT (datarefs, i, dr)
2432 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2433 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
2434 && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
2435 vect_update_init_of_dr (dr_info, niters, code);
2439 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
2440 by masking. This involves calculating the number of iterations to
2441 be peeled and then aligning all memory references appropriately. */
2443 void
2444 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
2446 tree misalign_in_elems;
2447 tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2449 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
2451 /* From the information recorded in LOOP_VINFO get the number of iterations
2452 that need to be skipped via masking. */
2453 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2455 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2456 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
2457 misalign_in_elems = build_int_cst (type, misalign);
2459 else
2461 gimple_seq seq1 = NULL, seq2 = NULL;
2462 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
2463 misalign_in_elems = fold_convert (type, misalign_in_elems);
2464 misalign_in_elems = force_gimple_operand (misalign_in_elems,
2465 &seq2, true, NULL_TREE);
2466 gimple_seq_add_seq (&seq1, seq2);
2467 if (seq1)
2469 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2470 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
2471 gcc_assert (!new_bb);
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_NOTE, vect_location,
2477 "misalignment for fully-masked loop: %T\n",
2478 misalign_in_elems);
2480 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
2482 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
2485 /* This function builds ni_name = number of iterations. Statements
2486 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
2487 it to TRUE if new ssa_var is generated. */
2489 tree
2490 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
2492 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2493 if (TREE_CODE (ni) == INTEGER_CST)
2494 return ni;
2495 else
2497 tree ni_name, var;
2498 gimple_seq stmts = NULL;
2499 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2501 var = create_tmp_var (TREE_TYPE (ni), "niters");
2502 ni_name = force_gimple_operand (ni, &stmts, false, var);
2503 if (stmts)
2505 gsi_insert_seq_on_edge_immediate (pe, stmts);
2506 if (new_var_p != NULL)
2507 *new_var_p = true;
2510 return ni_name;
2514 /* Calculate the number of iterations above which vectorized loop will be
2515 preferred than scalar loop. NITERS_PROLOG is the number of iterations
2516 of prolog loop. If it's integer const, the integer number is also passed
2517 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
2518 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
2519 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
2520 threshold below which the scalar (rather than vectorized) loop will be
2521 executed. This function stores the upper bound (inclusive) of the result
2522 in BOUND_SCALAR. */
2524 static tree
2525 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
2526 int bound_prolog, poly_int64 bound_epilog, int th,
2527 poly_uint64 *bound_scalar,
2528 bool check_profitability)
2530 tree type = TREE_TYPE (niters_prolog);
2531 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
2532 build_int_cst (type, bound_epilog));
2534 *bound_scalar = bound_prolog + bound_epilog;
2535 if (check_profitability)
2537 /* TH indicates the minimum niters of vectorized loop, while we
2538 compute the maximum niters of scalar loop. */
2539 th--;
2540 /* Peeling for constant times. */
2541 if (int_niters_prolog >= 0)
2543 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
2544 return build_int_cst (type, *bound_scalar);
2546 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
2547 and BOUND_EPILOG are inclusive upper bounds. */
2548 if (known_ge (th, bound_prolog + bound_epilog))
2550 *bound_scalar = th;
2551 return build_int_cst (type, th);
2553 /* Need to do runtime comparison. */
2554 else if (maybe_gt (th, bound_epilog))
2556 *bound_scalar = upper_bound (*bound_scalar, th);
2557 return fold_build2 (MAX_EXPR, type,
2558 build_int_cst (type, th), niters);
2561 return niters;
2564 /* NITERS is the number of times that the original scalar loop executes
2565 after peeling. Work out the maximum number of iterations N that can
2566 be handled by the vectorized form of the loop and then either:
2568 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
2570 niters_vector = N
2572 b) set *STEP_VECTOR_PTR to one and generate:
2574 niters_vector = N / vf
2576 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
2577 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
2578 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
2580 void
2581 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
2582 tree *niters_vector_ptr, tree *step_vector_ptr,
2583 bool niters_no_overflow)
2585 tree ni_minus_gap, var;
2586 tree niters_vector, step_vector, type = TREE_TYPE (niters);
2587 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2588 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2589 tree log_vf = NULL_TREE;
2591 /* If epilogue loop is required because of data accesses with gaps, we
2592 subtract one iteration from the total number of iterations here for
2593 correct calculation of RATIO. */
2594 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2596 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
2597 build_one_cst (type));
2598 if (!is_gimple_val (ni_minus_gap))
2600 var = create_tmp_var (type, "ni_gap");
2601 gimple *stmts = NULL;
2602 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
2603 true, var);
2604 gsi_insert_seq_on_edge_immediate (pe, stmts);
2607 else
2608 ni_minus_gap = niters;
2610 /* To silence some unexpected warnings, simply initialize to 0. */
2611 unsigned HOST_WIDE_INT const_vf = 0;
2612 if (vf.is_constant (&const_vf)
2613 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2615 /* Create: niters >> log2(vf) */
2616 /* If it's known that niters == number of latch executions + 1 doesn't
2617 overflow, we can generate niters >> log2(vf); otherwise we generate
2618 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
2619 will be at least one. */
2620 log_vf = build_int_cst (type, exact_log2 (const_vf));
2621 if (niters_no_overflow)
2622 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
2623 else
2624 niters_vector
2625 = fold_build2 (PLUS_EXPR, type,
2626 fold_build2 (RSHIFT_EXPR, type,
2627 fold_build2 (MINUS_EXPR, type,
2628 ni_minus_gap,
2629 build_int_cst (type, vf)),
2630 log_vf),
2631 build_int_cst (type, 1));
2632 step_vector = build_one_cst (type);
2634 else
2636 niters_vector = ni_minus_gap;
2637 step_vector = build_int_cst (type, vf);
2640 if (!is_gimple_val (niters_vector))
2642 var = create_tmp_var (type, "bnd");
2643 gimple_seq stmts = NULL;
2644 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
2645 gsi_insert_seq_on_edge_immediate (pe, stmts);
2646 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
2647 we set range information to make niters analyzer's life easier.
2648 Note the number of latch iteration value can be TYPE_MAX_VALUE so
2649 we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf. */
2650 if (stmts != NULL && log_vf)
2652 if (niters_no_overflow)
2654 value_range vr (type,
2655 wi::one (TYPE_PRECISION (type)),
2656 wi::rshift (wi::max_value (TYPE_PRECISION (type),
2657 TYPE_SIGN (type)),
2658 exact_log2 (const_vf),
2659 TYPE_SIGN (type)));
2660 set_range_info (niters_vector, vr);
2662 /* For VF == 1 the vector IV might also overflow so we cannot
2663 assert a minimum value of 1. */
2664 else if (const_vf > 1)
2666 value_range vr (type,
2667 wi::one (TYPE_PRECISION (type)),
2668 wi::rshift (wi::max_value (TYPE_PRECISION (type),
2669 TYPE_SIGN (type))
2670 - (const_vf - 1),
2671 exact_log2 (const_vf), TYPE_SIGN (type))
2672 + 1);
2673 set_range_info (niters_vector, vr);
2677 *niters_vector_ptr = niters_vector;
2678 *step_vector_ptr = step_vector;
2680 return;
2683 /* Given NITERS_VECTOR which is the number of iterations for vectorized
2684 loop specified by LOOP_VINFO after vectorization, compute the number
2685 of iterations before vectorization (niters_vector * vf) and store it
2686 to NITERS_VECTOR_MULT_VF_PTR. */
2688 static void
2689 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2690 tree niters_vector,
2691 tree *niters_vector_mult_vf_ptr)
2693 /* We should be using a step_vector of VF if VF is variable. */
2694 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
2695 tree type = TREE_TYPE (niters_vector);
2696 tree log_vf = build_int_cst (type, exact_log2 (vf));
2697 basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
2699 gcc_assert (niters_vector_mult_vf_ptr != NULL);
2700 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2701 niters_vector, log_vf);
2702 if (!is_gimple_val (niters_vector_mult_vf))
2704 tree var = create_tmp_var (type, "niters_vector_mult_vf");
2705 gimple_seq stmts = NULL;
2706 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2707 &stmts, true, var);
2708 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2709 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2711 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2714 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2715 this function searches for the corresponding lcssa phi node in exit
2716 bb of LOOP following the LCSSA_EDGE to the exit node. If it is found,
2717 return the phi result; otherwise return NULL. */
2719 static tree
2720 find_guard_arg (class loop *loop ATTRIBUTE_UNUSED, const_edge loop_e,
2721 class loop *epilog ATTRIBUTE_UNUSED, gphi *lcssa_phi,
2722 int lcssa_edge = 0)
2724 gphi_iterator gsi;
2726 for (gsi = gsi_start_phis (loop_e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2728 gphi *phi = gsi.phi ();
2729 /* Nested loops with multiple exits can have different no# phi node
2730 arguments between the main loop and epilog as epilog falls to the
2731 second loop. */
2732 if (gimple_phi_num_args (phi) > loop_e->dest_idx)
2734 tree var = PHI_ARG_DEF (phi, loop_e->dest_idx);
2735 if (TREE_CODE (var) != SSA_NAME)
2736 continue;
2737 tree def = get_current_def (var);
2738 if (!def)
2739 continue;
2740 if (operand_equal_p (def,
2741 PHI_ARG_DEF (lcssa_phi, lcssa_edge), 0))
2742 return PHI_RESULT (phi);
2745 return NULL_TREE;
2748 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
2749 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2750 are two pred edges of the merge point before UPDATE_LOOP. The two loops
2751 appear like below:
2753 guard_bb:
2754 if (cond)
2755 goto merge_bb;
2756 else
2757 goto skip_loop;
2759 skip_loop:
2760 header_a:
2761 i_1 = PHI<i_0, i_2>;
2763 i_2 = i_1 + 1;
2764 if (cond_a)
2765 goto latch_a;
2766 else
2767 goto exit_a;
2768 latch_a:
2769 goto header_a;
2771 exit_a:
2772 i_5 = PHI<i_2>;
2774 merge_bb:
2775 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2777 update_loop:
2778 header_b:
2779 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2781 i_4 = i_3 + 1;
2782 if (cond_b)
2783 goto latch_b;
2784 else
2785 goto exit_bb;
2786 latch_b:
2787 goto header_b;
2789 exit_bb:
2791 This function creates PHI nodes at merge_bb and replaces the use of i_5
2792 in the update_loop's PHI node with the result of new PHI result. */
2794 static void
2795 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2796 class loop *update_loop,
2797 edge guard_edge, edge merge_edge)
2799 location_t merge_loc, guard_loc;
2800 edge orig_e = loop_preheader_edge (skip_loop);
2801 edge update_e = loop_preheader_edge (update_loop);
2802 gphi_iterator gsi_orig, gsi_update;
2804 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2805 gsi_update = gsi_start_phis (update_loop->header));
2806 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2807 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2809 gphi *orig_phi = gsi_orig.phi ();
2810 gphi *update_phi = gsi_update.phi ();
2812 /* Generate new phi node at merge bb of the guard. */
2813 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2814 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2816 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
2817 args in NEW_PHI for these edges. */
2818 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2819 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2820 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2821 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2822 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2823 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2825 /* Update phi in UPDATE_PHI. */
2826 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2830 /* LOOP and EPILOG are two consecutive loops in CFG connected by LOOP_EXIT edge
2831 and EPILOG is copied from LOOP. Function slpeel_add_loop_guard adds guard
2832 skipping from a point between the two loops to the end of EPILOG. Edges
2833 GUARD_EDGE and MERGE_EDGE are the two pred edges of merge_bb at the end of
2834 EPILOG. The CFG looks like:
2836 loop:
2837 header_a:
2838 i_1 = PHI<i_0, i_2>;
2840 i_2 = i_1 + 1;
2841 if (cond_a)
2842 goto latch_a;
2843 else
2844 goto exit_a;
2845 latch_a:
2846 goto header_a;
2848 exit_a:
2850 guard_bb:
2851 if (cond)
2852 goto merge_bb;
2853 else
2854 goto epilog_loop;
2856 ;; fall_through_bb
2858 epilog_loop:
2859 header_b:
2860 i_3 = PHI<i_2, i_4>;
2862 i_4 = i_3 + 1;
2863 if (cond_b)
2864 goto latch_b;
2865 else
2866 goto merge_bb;
2867 latch_b:
2868 goto header_b;
2870 merge_bb:
2871 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2873 exit_bb:
2874 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
2876 For each name used out side EPILOG (i.e - for each name that has a lcssa
2877 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
2878 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
2879 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2880 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2881 in exit_bb will also be updated. */
2883 static void
2884 slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
2885 const_edge loop_exit,
2886 edge guard_edge, edge merge_edge)
2888 gphi_iterator gsi;
2889 basic_block merge_bb = guard_edge->dest;
2891 gcc_assert (single_succ_p (merge_bb));
2892 edge e = single_succ_edge (merge_bb);
2893 basic_block exit_bb = e->dest;
2895 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2897 gphi *update_phi = gsi.phi ();
2898 tree old_arg = PHI_ARG_DEF (update_phi, e->dest_idx);
2900 tree merge_arg = NULL_TREE;
2902 /* If the old argument is a SSA_NAME use its current_def. */
2903 if (TREE_CODE (old_arg) == SSA_NAME)
2904 merge_arg = get_current_def (old_arg);
2905 /* If it's a constant or doesn't have a current_def, just use the old
2906 argument. */
2907 if (!merge_arg)
2908 merge_arg = old_arg;
2910 tree guard_arg = find_guard_arg (loop, loop_exit, epilog,
2911 update_phi, e->dest_idx);
2912 /* If the var is live after loop but not a reduction, we simply
2913 use the old arg. */
2914 if (!guard_arg)
2915 guard_arg = old_arg;
2917 /* Create new phi node in MERGE_BB: */
2918 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2919 gphi *merge_phi = create_phi_node (new_res, merge_bb);
2921 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2922 the two PHI args in merge_phi for these edges. */
2923 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2924 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2926 /* Update the original phi in exit_bb. */
2927 adjust_phi_and_debug_stmts (update_phi, e, new_res);
2931 /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
2932 Return a value that equals:
2934 - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
2935 - SKIP_VALUE when the main loop is skipped. */
2937 tree
2938 vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
2939 tree skip_value)
2941 gcc_assert (loop_vinfo->main_loop_edge);
2943 tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
2944 basic_block bb = loop_vinfo->main_loop_edge->dest;
2945 gphi *new_phi = create_phi_node (phi_result, bb);
2946 add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
2947 UNKNOWN_LOCATION);
2948 add_phi_arg (new_phi, skip_value,
2949 loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
2950 return phi_result;
2953 /* Function vect_do_peeling.
2955 Input:
2956 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2958 preheader:
2959 LOOP:
2960 header_bb:
2961 loop_body
2962 if (exit_loop_cond) goto exit_bb
2963 else goto header_bb
2964 exit_bb:
2966 - NITERS: The number of iterations of the loop.
2967 - NITERSM1: The number of iterations of the loop's latch.
2968 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2969 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2970 CHECK_PROFITABILITY is true.
2971 Output:
2972 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
2973 iterate after vectorization; see vect_set_loop_condition for details.
2974 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2975 should be set to the number of scalar iterations handled by the
2976 vector loop. The SSA name is only used on exit from the loop.
2978 This function peels prolog and epilog from the loop, adds guards skipping
2979 PROLOG and EPILOG for various conditions. As a result, the changed CFG
2980 would look like:
2982 guard_bb_1:
2983 if (prefer_scalar_loop) goto merge_bb_1
2984 else goto guard_bb_2
2986 guard_bb_2:
2987 if (skip_prolog) goto merge_bb_2
2988 else goto prolog_preheader
2990 prolog_preheader:
2991 PROLOG:
2992 prolog_header_bb:
2993 prolog_body
2994 if (exit_prolog_cond) goto prolog_exit_bb
2995 else goto prolog_header_bb
2996 prolog_exit_bb:
2998 merge_bb_2:
3000 vector_preheader:
3001 VECTOR LOOP:
3002 vector_header_bb:
3003 vector_body
3004 if (exit_vector_cond) goto vector_exit_bb
3005 else goto vector_header_bb
3006 vector_exit_bb:
3008 guard_bb_3:
3009 if (skip_epilog) goto merge_bb_3
3010 else goto epilog_preheader
3012 merge_bb_1:
3014 epilog_preheader:
3015 EPILOG:
3016 epilog_header_bb:
3017 epilog_body
3018 if (exit_epilog_cond) goto merge_bb_3
3019 else goto epilog_header_bb
3021 merge_bb_3:
3023 Note this function peels prolog and epilog only if it's necessary,
3024 as well as guards.
3025 This function returns the epilogue loop if a decision was made to vectorize
3026 it, otherwise NULL.
3028 The analysis resulting in this epilogue loop's loop_vec_info was performed
3029 in the same vect_analyze_loop call as the main loop's. At that time
3030 vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
3031 vectorization factors than the main loop. This list is stored in the main
3032 loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
3033 vectorize the epilogue loop for a lower vectorization factor, the
3034 loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
3035 updated and linked to the epilogue loop. This is later used to vectorize
3036 the epilogue. The reason the loop_vec_info needs updating is that it was
3037 constructed based on the original main loop, and the epilogue loop is a
3038 copy of this loop, so all links pointing to statements in the original loop
3039 need updating. Furthermore, these loop_vec_infos share the
3040 data_reference's records, which will also need to be updated.
3042 TODO: Guard for prefer_scalar_loop should be emitted along with
3043 versioning conditions if loop versioning is needed. */
3046 class loop *
3047 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
3048 tree *niters_vector, tree *step_vector,
3049 tree *niters_vector_mult_vf_var, int th,
3050 bool check_profitability, bool niters_no_overflow,
3051 tree *advance)
3053 edge e, guard_e;
3054 tree type = TREE_TYPE (niters), guard_cond;
3055 basic_block guard_bb, guard_to;
3056 profile_probability prob_prolog, prob_vector, prob_epilog;
3057 int estimated_vf;
3058 int prolog_peeling = 0;
3059 bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
3060 /* We currently do not support prolog peeling if the target alignment is not
3061 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
3062 target alignment being constant. */
3063 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3064 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
3065 return NULL;
3067 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
3068 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3070 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3071 poly_uint64 bound_epilog = 0;
3072 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
3073 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
3074 bound_epilog += vf - 1;
3075 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3076 bound_epilog += 1;
3077 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
3078 poly_uint64 bound_scalar = bound_epilog;
3080 if (!prolog_peeling && !epilog_peeling)
3081 return NULL;
3083 /* Before doing any peeling make sure to reset debug binds outside of
3084 the loop refering to defs not in LC SSA. */
3085 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3086 for (unsigned i = 0; i < loop->num_nodes; ++i)
3088 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
3089 imm_use_iterator ui;
3090 gimple *use_stmt;
3091 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
3092 gsi_next (&gsi))
3094 FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
3095 if (gimple_debug_bind_p (use_stmt)
3096 && loop != gimple_bb (use_stmt)->loop_father
3097 && !flow_loop_nested_p (loop,
3098 gimple_bb (use_stmt)->loop_father))
3100 gimple_debug_bind_reset_value (use_stmt);
3101 update_stmt (use_stmt);
3104 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3105 gsi_next (&gsi))
3107 ssa_op_iter op_iter;
3108 def_operand_p def_p;
3109 FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
3110 FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
3111 if (gimple_debug_bind_p (use_stmt)
3112 && loop != gimple_bb (use_stmt)->loop_father
3113 && !flow_loop_nested_p (loop,
3114 gimple_bb (use_stmt)->loop_father))
3116 gimple_debug_bind_reset_value (use_stmt);
3117 update_stmt (use_stmt);
3122 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
3123 estimated_vf = vect_vf_for_cost (loop_vinfo);
3124 if (estimated_vf == 2)
3125 estimated_vf = 3;
3126 prob_prolog = prob_epilog = profile_probability::guessed_always ()
3127 .apply_scale (estimated_vf - 1, estimated_vf);
3129 class loop *prolog, *epilog = NULL;
3130 class loop *first_loop = loop;
3131 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
3133 /* SSA form needs to be up-to-date since we are going to manually
3134 update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
3135 update SSA state after that, so we have to make sure to not lose any
3136 pending update needs. */
3137 gcc_assert (!need_ssa_update_p (cfun));
3139 /* If we're vectorizing an epilogue loop, we have ensured that the
3140 virtual operand is in SSA form throughout the vectorized main loop.
3141 Normally it is possible to trace the updated
3142 vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
3143 back to scalar-stmt vuses, meaning that the effect of the SSA update
3144 remains local to the main loop. However, there are rare cases in
3145 which the vectorized loop should have vdefs even when the original scalar
3146 loop didn't. For example, vectorizing a load with IFN_LOAD_LANES
3147 introduces clobbers of the temporary vector array, which in turn
3148 needs new vdefs. If the scalar loop doesn't write to memory, these
3149 new vdefs will be the only ones in the vector loop.
3150 We are currently defering updating virtual SSA form and creating
3151 of a virtual PHI for this case so we do not have to make sure the
3152 newly introduced virtual def is in LCSSA form. */
3154 if (MAY_HAVE_DEBUG_BIND_STMTS)
3156 gcc_assert (!adjust_vec.exists ());
3157 adjust_vec.create (32);
3159 initialize_original_copy_tables ();
3161 /* Record the anchor bb at which the guard should be placed if the scalar
3162 loop might be preferred. */
3163 basic_block anchor = loop_preheader_edge (loop)->src;
3165 /* Generate the number of iterations for the prolog loop. We do this here
3166 so that we can also get the upper bound on the number of iterations. */
3167 tree niters_prolog;
3168 int bound_prolog = 0;
3169 if (prolog_peeling)
3171 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
3172 &bound_prolog);
3173 /* If algonment peeling is known, we will always execute prolog. */
3174 if (TREE_CODE (niters_prolog) == INTEGER_CST)
3175 prob_prolog = profile_probability::always ();
3177 else
3178 niters_prolog = build_int_cst (type, 0);
3180 loop_vec_info epilogue_vinfo = NULL;
3181 if (vect_epilogues)
3183 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
3184 loop_vinfo->epilogue_vinfos.ordered_remove (0);
3187 tree niters_vector_mult_vf = NULL_TREE;
3188 /* Saving NITERs before the loop, as this may be changed by prologue. */
3189 tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
3190 edge update_e = NULL, skip_e = NULL;
3191 unsigned int lowest_vf = constant_lower_bound (vf);
3192 /* Prolog loop may be skipped. */
3193 bool skip_prolog = (prolog_peeling != 0);
3194 /* Skip this loop to epilog when there are not enough iterations to enter this
3195 vectorized loop. If true we should perform runtime checks on the NITERS
3196 to check whether we should skip the current vectorized loop. If we know
3197 the number of scalar iterations we may choose to add a runtime check if
3198 this number "maybe" smaller than the number of iterations required
3199 when we know the number of scalar iterations may potentially
3200 be smaller than the number of iterations required to enter this loop, for
3201 this we use the upper bounds on the prolog and epilog peeling. When we
3202 don't know the number of iterations and don't require versioning it is
3203 because we have asserted that there are enough scalar iterations to enter
3204 the main loop, so this skip is not necessary. When we are versioning then
3205 we only add such a skip if we have chosen to vectorize the epilogue. */
3206 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3207 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
3208 bound_prolog + bound_epilog)
3209 : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
3210 || vect_epilogues));
3211 /* Epilog loop must be executed if the number of iterations for epilog
3212 loop is known at compile time, otherwise we need to add a check at
3213 the end of vector loop and skip to the end of epilog loop. */
3214 bool skip_epilog = (prolog_peeling < 0
3215 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3216 || !vf.is_constant ());
3217 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
3218 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3219 skip_epilog = false;
3221 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3222 auto_vec<profile_count> original_counts;
3223 basic_block *original_bbs = NULL;
3225 if (skip_vector)
3227 split_edge (loop_preheader_edge (loop));
3229 if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
3231 original_bbs = get_loop_body (loop);
3232 for (unsigned int i = 0; i < loop->num_nodes; i++)
3233 original_counts.safe_push(original_bbs[i]->count);
3236 /* Due to the order in which we peel prolog and epilog, we first
3237 propagate probability to the whole loop. The purpose is to
3238 avoid adjusting probabilities of both prolog and vector loops
3239 separately. Note in this case, the probability of epilog loop
3240 needs to be scaled back later. */
3241 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
3242 if (prob_vector.initialized_p ())
3244 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
3245 scale_loop_profile (loop, prob_vector, -1);
3249 if (vect_epilogues)
3251 /* Make sure to set the epilogue's epilogue scalar loop, such that we can
3252 use the original scalar loop as remaining epilogue if necessary. */
3253 LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
3254 = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3255 LOOP_VINFO_SCALAR_IV_EXIT (epilogue_vinfo)
3256 = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3259 if (prolog_peeling)
3261 e = loop_preheader_edge (loop);
3262 edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
3263 gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e));
3265 /* Peel prolog and put it on preheader edge of loop. */
3266 edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3267 edge prolog_e = NULL;
3268 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
3269 scalar_loop, scalar_e,
3270 e, &prolog_e);
3271 gcc_assert (prolog);
3272 prolog->force_vectorize = false;
3274 first_loop = prolog;
3275 reset_original_copy_tables ();
3277 /* Update the number of iterations for prolog loop. */
3278 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
3279 vect_set_loop_condition (prolog, prolog_e, NULL, niters_prolog,
3280 step_prolog, NULL_TREE, false);
3282 /* Skip the prolog loop. */
3283 if (skip_prolog)
3285 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3286 niters_prolog, build_int_cst (type, 0));
3287 guard_bb = loop_preheader_edge (prolog)->src;
3288 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
3289 guard_to = split_edge (loop_preheader_edge (loop));
3290 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3291 guard_to, guard_bb,
3292 prob_prolog.invert (),
3293 irred_flag);
3294 e = EDGE_PRED (guard_to, 0);
3295 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3296 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
3298 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
3299 scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
3302 /* Update init address of DRs. */
3303 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
3304 /* Update niters for vector loop. */
3305 LOOP_VINFO_NITERS (loop_vinfo)
3306 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
3307 LOOP_VINFO_NITERSM1 (loop_vinfo)
3308 = fold_build2 (MINUS_EXPR, type,
3309 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3310 bool new_var_p = false;
3311 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
3312 /* It's guaranteed that vector loop bound before vectorization is at
3313 least VF, so set range information for newly generated var. */
3314 if (new_var_p)
3316 value_range vr (type,
3317 wi::to_wide (build_int_cst (type, lowest_vf)),
3318 wi::to_wide (TYPE_MAX_VALUE (type)));
3319 set_range_info (niters, vr);
3322 /* Prolog iterates at most bound_prolog times, latch iterates at
3323 most bound_prolog - 1 times. */
3324 record_niter_bound (prolog, bound_prolog - 1, false, true);
3325 delete_update_ssa ();
3326 adjust_vec_debug_stmts ();
3327 scev_reset ();
3329 basic_block bb_before_epilog = NULL;
3331 if (epilog_peeling)
3333 e = LOOP_VINFO_IV_EXIT (loop_vinfo);
3334 gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
3336 /* Peel epilog and put it on exit edge of loop. If we are vectorizing
3337 said epilog then we should use a copy of the main loop as a starting
3338 point. This loop may have already had some preliminary transformations
3339 to allow for more optimal vectorization, for example if-conversion.
3340 If we are not vectorizing the epilog then we should use the scalar loop
3341 as the transformations mentioned above make less or no sense when not
3342 vectorizing. */
3343 edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3344 epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
3345 edge epilog_e = vect_epilogues ? e : scalar_e;
3346 edge new_epilog_e = NULL;
3347 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog,
3348 epilog_e, e,
3349 &new_epilog_e);
3350 LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo) = new_epilog_e;
3351 gcc_assert (epilog);
3352 epilog->force_vectorize = false;
3353 bb_before_epilog = loop_preheader_edge (epilog)->src;
3355 /* Scalar version loop may be preferred. In this case, add guard
3356 and skip to epilog. Note this only happens when the number of
3357 iterations of loop is unknown at compile time, otherwise this
3358 won't be vectorized. */
3359 if (skip_vector)
3361 /* Additional epilogue iteration is peeled if gap exists. */
3362 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
3363 bound_prolog, bound_epilog,
3364 th, &bound_scalar,
3365 check_profitability);
3366 /* Build guard against NITERSM1 since NITERS may overflow. */
3367 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
3368 guard_bb = anchor;
3369 guard_to = split_edge (loop_preheader_edge (epilog));
3370 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3371 guard_to, guard_bb,
3372 prob_vector.invert (),
3373 irred_flag);
3374 skip_e = guard_e;
3375 e = EDGE_PRED (guard_to, 0);
3376 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3377 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
3379 /* Simply propagate profile info from guard_bb to guard_to which is
3380 a merge point of control flow. */
3381 profile_count old_count = guard_to->count;
3382 guard_to->count = guard_bb->count;
3384 /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
3385 if (vect_epilogues || scalar_loop == NULL)
3387 gcc_assert(epilog->num_nodes == loop->num_nodes);
3388 basic_block *bbs = get_loop_body (epilog);
3389 for (unsigned int i = 0; i < epilog->num_nodes; i++)
3391 gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
3392 bbs[i]->count = original_counts[i];
3394 free (bbs);
3395 free (original_bbs);
3397 else if (old_count.nonzero_p ())
3398 scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
3400 /* Only need to handle basic block before epilog loop if it's not
3401 the guard_bb, which is the case when skip_vector is true. */
3402 if (guard_bb != bb_before_epilog)
3403 bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
3404 bb_before_epilog = loop_preheader_edge (epilog)->src;
3406 /* If loop is peeled for non-zero constant times, now niters refers to
3407 orig_niters - prolog_peeling, it won't overflow even the orig_niters
3408 overflows. */
3409 niters_no_overflow |= (prolog_peeling > 0);
3410 vect_gen_vector_loop_niters (loop_vinfo, niters,
3411 niters_vector, step_vector,
3412 niters_no_overflow);
3413 if (!integer_onep (*step_vector))
3415 /* On exit from the loop we will have an easy way of calcalating
3416 NITERS_VECTOR / STEP * STEP. Install a dummy definition
3417 until then. */
3418 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
3419 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
3420 *niters_vector_mult_vf_var = niters_vector_mult_vf;
3422 else
3423 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
3424 &niters_vector_mult_vf);
3425 /* Update IVs of original loop as if they were advanced by
3426 niters_vector_mult_vf steps. */
3427 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
3428 update_e = skip_vector ? e : loop_preheader_edge (epilog);
3429 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
3430 update_e);
3432 if (skip_epilog)
3434 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3435 niters, niters_vector_mult_vf);
3436 guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
3437 edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
3438 guard_to = split_edge (epilog_e);
3439 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
3440 skip_vector ? anchor : guard_bb,
3441 prob_epilog.invert (),
3442 irred_flag);
3443 if (vect_epilogues)
3444 epilogue_vinfo->skip_this_loop_edge = guard_e;
3445 edge main_iv = LOOP_VINFO_IV_EXIT (loop_vinfo);
3446 slpeel_update_phi_nodes_for_guard2 (loop, epilog, main_iv, guard_e,
3447 epilog_e);
3448 /* Only need to handle basic block before epilog loop if it's not
3449 the guard_bb, which is the case when skip_vector is true. */
3450 if (guard_bb != bb_before_epilog)
3452 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
3454 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
3456 scale_loop_profile (epilog, prob_epilog, -1);
3459 unsigned HOST_WIDE_INT bound;
3460 if (bound_scalar.is_constant (&bound))
3462 gcc_assert (bound != 0);
3463 /* -1 to convert loop iterations to latch iterations. */
3464 record_niter_bound (epilog, bound - 1, false, true);
3465 scale_loop_profile (epilog, profile_probability::always (),
3466 bound - 1);
3469 delete_update_ssa ();
3470 adjust_vec_debug_stmts ();
3471 scev_reset ();
3474 if (vect_epilogues)
3476 epilog->aux = epilogue_vinfo;
3477 LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
3478 LOOP_VINFO_IV_EXIT (epilogue_vinfo)
3479 = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
3481 loop_constraint_clear (epilog, LOOP_C_INFINITE);
3483 /* We now must calculate the number of NITERS performed by the previous
3484 loop and EPILOGUE_NITERS to be performed by the epilogue. */
3485 tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
3486 niters_prolog, niters_vector_mult_vf);
3488 /* If skip_vector we may skip the previous loop, we insert a phi-node to
3489 determine whether we are coming from the previous vectorized loop
3490 using the update_e edge or the skip_vector basic block using the
3491 skip_e edge. */
3492 if (skip_vector)
3494 gcc_assert (update_e != NULL && skip_e != NULL);
3495 gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
3496 update_e->dest);
3497 tree new_ssa = make_ssa_name (TREE_TYPE (niters));
3498 gimple *stmt = gimple_build_assign (new_ssa, niters);
3499 gimple_stmt_iterator gsi;
3500 if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
3501 && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
3503 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
3504 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
3506 else
3508 gsi = gsi_last_bb (update_e->src);
3509 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3512 niters = new_ssa;
3513 add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
3514 add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
3515 UNKNOWN_LOCATION);
3516 niters = PHI_RESULT (new_phi);
3517 epilogue_vinfo->main_loop_edge = update_e;
3518 epilogue_vinfo->skip_main_loop_edge = skip_e;
3521 /* Set ADVANCE to the number of iterations performed by the previous
3522 loop and its prologue. */
3523 *advance = niters;
3525 /* Subtract the number of iterations performed by the vectorized loop
3526 from the number of total iterations. */
3527 tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
3528 before_loop_niters,
3529 niters);
3531 LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
3532 LOOP_VINFO_NITERSM1 (epilogue_vinfo)
3533 = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
3534 epilogue_niters,
3535 build_one_cst (TREE_TYPE (epilogue_niters)));
3537 /* Decide what to do if the number of epilogue iterations is not
3538 a multiple of the epilogue loop's vectorization factor.
3539 We should have rejected the loop during the analysis phase
3540 if this fails. */
3541 bool res = vect_determine_partial_vectors_and_peeling (epilogue_vinfo);
3542 gcc_assert (res);
3545 adjust_vec.release ();
3546 free_original_copy_tables ();
3548 return vect_epilogues ? epilog : NULL;
3551 /* Function vect_create_cond_for_niters_checks.
3553 Create a conditional expression that represents the run-time checks for
3554 loop's niter. The loop is guaranteed to terminate if the run-time
3555 checks hold.
3557 Input:
3558 COND_EXPR - input conditional expression. New conditions will be chained
3559 with logical AND operation. If it is NULL, then the function
3560 is used to return the number of alias checks.
3561 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3562 to be checked.
3564 Output:
3565 COND_EXPR - conditional expression.
3567 The returned COND_EXPR is the conditional expression to be used in the
3568 if statement that controls which version of the loop gets executed at
3569 runtime. */
3571 static void
3572 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
3574 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
3576 if (*cond_expr)
3577 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3578 *cond_expr, part_cond_expr);
3579 else
3580 *cond_expr = part_cond_expr;
3583 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3584 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
3586 static void
3587 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
3589 if (*cond_expr)
3590 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3591 *cond_expr, part_cond_expr);
3592 else
3593 *cond_expr = part_cond_expr;
3596 /* Function vect_create_cond_for_align_checks.
3598 Create a conditional expression that represents the alignment checks for
3599 all of data references (array element references) whose alignment must be
3600 checked at runtime.
3602 Input:
3603 COND_EXPR - input conditional expression. New conditions will be chained
3604 with logical AND operation.
3605 LOOP_VINFO - two fields of the loop information are used.
3606 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3607 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3609 Output:
3610 COND_EXPR_STMT_LIST - statements needed to construct the conditional
3611 expression.
3612 The returned value is the conditional expression to be used in the if
3613 statement that controls which version of the loop gets executed at runtime.
3615 The algorithm makes two assumptions:
3616 1) The number of bytes "n" in a vector is a power of 2.
3617 2) An address "a" is aligned if a%n is zero and that this
3618 test can be done as a&(n-1) == 0. For example, for 16
3619 byte vectors the test is a&0xf == 0. */
3621 static void
3622 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3623 tree *cond_expr,
3624 gimple_seq *cond_expr_stmt_list)
3626 const vec<stmt_vec_info> &may_misalign_stmts
3627 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3628 stmt_vec_info stmt_info;
3629 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3630 tree mask_cst;
3631 unsigned int i;
3632 tree int_ptrsize_type;
3633 char tmp_name[20];
3634 tree or_tmp_name = NULL_TREE;
3635 tree and_tmp_name;
3636 gimple *and_stmt;
3637 tree ptrsize_zero;
3638 tree part_cond_expr;
3640 /* Check that mask is one less than a power of 2, i.e., mask is
3641 all zeros followed by all ones. */
3642 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3644 int_ptrsize_type = signed_type_for (ptr_type_node);
3646 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3647 of the first vector of the i'th data reference. */
3649 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3651 gimple_seq new_stmt_list = NULL;
3652 tree addr_base;
3653 tree addr_tmp_name;
3654 tree new_or_tmp_name;
3655 gimple *addr_stmt, *or_stmt;
3656 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3657 bool negative = tree_int_cst_compare
3658 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
3659 tree offset = negative
3660 ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
3661 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
3662 : size_zero_node;
3664 /* create: addr_tmp = (int)(address_of_first_vector) */
3665 addr_base =
3666 vect_create_addr_base_for_vector_ref (loop_vinfo,
3667 stmt_info, &new_stmt_list,
3668 offset);
3669 if (new_stmt_list != NULL)
3670 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
3672 sprintf (tmp_name, "addr2int%d", i);
3673 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3674 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
3675 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
3677 /* The addresses are OR together. */
3679 if (or_tmp_name != NULL_TREE)
3681 /* create: or_tmp = or_tmp | addr_tmp */
3682 sprintf (tmp_name, "orptrs%d", i);
3683 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3684 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
3685 or_tmp_name, addr_tmp_name);
3686 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
3687 or_tmp_name = new_or_tmp_name;
3689 else
3690 or_tmp_name = addr_tmp_name;
3692 } /* end for i */
3694 mask_cst = build_int_cst (int_ptrsize_type, mask);
3696 /* create: and_tmp = or_tmp & mask */
3697 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
3699 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
3700 or_tmp_name, mask_cst);
3701 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
3703 /* Make and_tmp the left operand of the conditional test against zero.
3704 if and_tmp has a nonzero bit then some address is unaligned. */
3705 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3706 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
3707 and_tmp_name, ptrsize_zero);
3708 chain_cond_expr (cond_expr, part_cond_expr);
3711 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
3712 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
3713 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3714 and this new condition are true. Treat a null *COND_EXPR as "true". */
3716 static void
3717 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
3719 const vec<vec_object_pair> &pairs
3720 = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3721 unsigned int i;
3722 vec_object_pair *pair;
3723 FOR_EACH_VEC_ELT (pairs, i, pair)
3725 tree addr1 = build_fold_addr_expr (pair->first);
3726 tree addr2 = build_fold_addr_expr (pair->second);
3727 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
3728 addr1, addr2);
3729 chain_cond_expr (cond_expr, part_cond_expr);
3733 /* Create an expression that is true when all lower-bound conditions for
3734 the vectorized loop are met. Chain this condition with *COND_EXPR. */
3736 static void
3737 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
3739 const vec<vec_lower_bound> &lower_bounds
3740 = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3741 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3743 tree expr = lower_bounds[i].expr;
3744 tree type = unsigned_type_for (TREE_TYPE (expr));
3745 expr = fold_convert (type, expr);
3746 poly_uint64 bound = lower_bounds[i].min_value;
3747 if (!lower_bounds[i].unsigned_p)
3749 expr = fold_build2 (PLUS_EXPR, type, expr,
3750 build_int_cstu (type, bound - 1));
3751 bound += bound - 1;
3753 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
3754 build_int_cstu (type, bound));
3755 chain_cond_expr (cond_expr, part_cond_expr);
3759 /* Function vect_create_cond_for_alias_checks.
3761 Create a conditional expression that represents the run-time checks for
3762 overlapping of address ranges represented by a list of data references
3763 relations passed as input.
3765 Input:
3766 COND_EXPR - input conditional expression. New conditions will be chained
3767 with logical AND operation. If it is NULL, then the function
3768 is used to return the number of alias checks.
3769 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3770 to be checked.
3772 Output:
3773 COND_EXPR - conditional expression.
3775 The returned COND_EXPR is the conditional expression to be used in the if
3776 statement that controls which version of the loop gets executed at runtime.
3779 void
3780 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
3782 const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
3783 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3785 if (comp_alias_ddrs.is_empty ())
3786 return;
3788 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
3789 &comp_alias_ddrs, cond_expr);
3790 if (dump_enabled_p ())
3791 dump_printf_loc (MSG_NOTE, vect_location,
3792 "created %u versioning for alias checks.\n",
3793 comp_alias_ddrs.length ());
3797 /* Function vect_loop_versioning.
3799 If the loop has data references that may or may not be aligned or/and
3800 has data reference relations whose independence was not proven then
3801 two versions of the loop need to be generated, one which is vectorized
3802 and one which isn't. A test is then generated to control which of the
3803 loops is executed. The test checks for the alignment of all of the
3804 data references that may or may not be aligned. An additional
3805 sequence of runtime tests is generated for each pairs of DDRs whose
3806 independence was not proven. The vectorized version of loop is
3807 executed only if both alias and alignment tests are passed.
3809 The test generated to check which version of loop is executed
3810 is modified to also check for profitability as indicated by the
3811 cost model threshold TH.
3813 The versioning precondition(s) are placed in *COND_EXPR and
3814 *COND_EXPR_STMT_LIST. */
3816 class loop *
3817 vect_loop_versioning (loop_vec_info loop_vinfo,
3818 gimple *loop_vectorized_call)
3820 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
3821 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3822 basic_block condition_bb;
3823 gphi_iterator gsi;
3824 gimple_stmt_iterator cond_exp_gsi;
3825 basic_block merge_bb;
3826 basic_block new_exit_bb;
3827 edge new_exit_e, e;
3828 gphi *orig_phi, *new_phi;
3829 tree cond_expr = NULL_TREE;
3830 gimple_seq cond_expr_stmt_list = NULL;
3831 tree arg;
3832 profile_probability prob = profile_probability::likely ();
3833 gimple_seq gimplify_stmt_list = NULL;
3834 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
3835 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3836 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
3837 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
3838 poly_uint64 versioning_threshold
3839 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3840 tree version_simd_if_cond
3841 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
3842 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3844 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3845 && !ordered_p (th, versioning_threshold))
3846 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3847 build_int_cst (TREE_TYPE (scalar_loop_iters),
3848 th - 1));
3849 if (maybe_ne (versioning_threshold, 0U))
3851 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3852 build_int_cst (TREE_TYPE (scalar_loop_iters),
3853 versioning_threshold - 1));
3854 if (cond_expr)
3855 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3856 expr, cond_expr);
3857 else
3858 cond_expr = expr;
3861 tree cost_name = NULL_TREE;
3862 profile_probability prob2 = profile_probability::always ();
3863 if (cond_expr
3864 && EXPR_P (cond_expr)
3865 && (version_niter
3866 || version_align
3867 || version_alias
3868 || version_simd_if_cond))
3870 cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3871 &cond_expr_stmt_list,
3872 is_gimple_val, NULL_TREE);
3873 /* Split prob () into two so that the overall probability of passing
3874 both the cost-model and versioning checks is the orig prob. */
3875 prob2 = prob = prob.sqrt ();
3878 if (version_niter)
3879 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3881 if (cond_expr)
3883 gimple_seq tem = NULL;
3884 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3885 &tem, is_gimple_condexpr_for_cond,
3886 NULL_TREE);
3887 gimple_seq_add_seq (&cond_expr_stmt_list, tem);
3890 if (version_align)
3891 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3892 &cond_expr_stmt_list);
3894 if (version_alias)
3896 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
3897 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
3898 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3901 if (version_simd_if_cond)
3903 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3904 if (flag_checking)
3905 if (basic_block bb
3906 = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3907 gcc_assert (bb != loop->header
3908 && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3909 && (scalar_loop == NULL
3910 || (bb != scalar_loop->header
3911 && dominated_by_p (CDI_DOMINATORS,
3912 scalar_loop->header, bb))));
3913 tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3914 tree c = fold_build2 (NE_EXPR, boolean_type_node,
3915 version_simd_if_cond, zero);
3916 if (cond_expr)
3917 cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3918 c, cond_expr);
3919 else
3920 cond_expr = c;
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_NOTE, vect_location,
3923 "created versioning for simd if condition check.\n");
3926 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3927 &gimplify_stmt_list,
3928 is_gimple_condexpr_for_cond, NULL_TREE);
3929 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
3931 /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
3932 invariant in. */
3933 class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3934 for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
3935 !gsi_end_p (gsi); gsi_next (&gsi))
3937 gimple *stmt = gsi_stmt (gsi);
3938 update_stmt (stmt);
3939 ssa_op_iter iter;
3940 use_operand_p use_p;
3941 basic_block def_bb;
3942 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3943 if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3944 && flow_bb_inside_loop_p (outermost, def_bb))
3945 outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
3948 /* Search for the outermost loop we can version. Avoid versioning of
3949 non-perfect nests but allow if-conversion versioned loops inside. */
3950 class loop *loop_to_version = loop;
3951 if (flow_loop_nested_p (outermost, loop))
3953 if (dump_enabled_p ())
3954 dump_printf_loc (MSG_NOTE, vect_location,
3955 "trying to apply versioning to outer loop %d\n",
3956 outermost->num);
3957 if (outermost->num == 0)
3958 outermost = superloop_at_depth (loop, 1);
3959 /* And avoid applying versioning on non-perfect nests. */
3960 while (loop_to_version != outermost
3961 && (e = single_exit (loop_outer (loop_to_version)))
3962 && !(e->flags & EDGE_COMPLEX)
3963 && (!loop_outer (loop_to_version)->inner->next
3964 || vect_loop_vectorized_call (loop_to_version))
3965 && (!loop_outer (loop_to_version)->inner->next
3966 || !loop_outer (loop_to_version)->inner->next->next))
3967 loop_to_version = loop_outer (loop_to_version);
3970 /* Apply versioning. If there is already a scalar version created by
3971 if-conversion re-use that. Note we cannot re-use the copy of
3972 an if-converted outer-loop when vectorizing the inner loop only. */
3973 gcond *cond;
3974 if ((!loop_to_version->inner || loop == loop_to_version)
3975 && loop_vectorized_call)
3977 gcc_assert (scalar_loop);
3978 condition_bb = gimple_bb (loop_vectorized_call);
3979 cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
3980 gimple_cond_set_condition_from_tree (cond, cond_expr);
3981 update_stmt (cond);
3983 if (cond_expr_stmt_list)
3985 cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
3986 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3987 GSI_SAME_STMT);
3990 /* if-conversion uses profile_probability::always () for both paths,
3991 reset the paths probabilities appropriately. */
3992 edge te, fe;
3993 extract_true_false_edges_from_block (condition_bb, &te, &fe);
3994 te->probability = prob;
3995 fe->probability = prob.invert ();
3996 /* We can scale loops counts immediately but have to postpone
3997 scaling the scalar loop because we re-use it during peeling.
3999 Ifcvt duplicates loop preheader, loop body and produces an basic
4000 block after loop exit. We need to scale all that. */
4001 basic_block preheader = loop_preheader_edge (loop_to_version)->src;
4002 preheader->count = preheader->count.apply_probability (prob * prob2);
4003 scale_loop_frequencies (loop_to_version, prob * prob2);
4004 single_exit (loop_to_version)->dest->count = preheader->count;
4005 LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
4007 nloop = scalar_loop;
4008 if (dump_enabled_p ())
4009 dump_printf_loc (MSG_NOTE, vect_location,
4010 "reusing %sloop version created by if conversion\n",
4011 loop_to_version != loop ? "outer " : "");
4013 else
4015 if (loop_to_version != loop
4016 && dump_enabled_p ())
4017 dump_printf_loc (MSG_NOTE, vect_location,
4018 "applying loop versioning to outer loop %d\n",
4019 loop_to_version->num);
4021 unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
4023 initialize_original_copy_tables ();
4024 nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
4025 prob * prob2, (prob * prob2).invert (),
4026 prob * prob2, (prob * prob2).invert (),
4027 true);
4028 /* We will later insert second conditional so overall outcome of
4029 both is prob * prob2. */
4030 edge true_e, false_e;
4031 extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
4032 true_e->probability = prob;
4033 false_e->probability = prob.invert ();
4034 gcc_assert (nloop);
4035 nloop = get_loop_copy (loop);
4037 /* For cycle vectorization with SLP we rely on the PHI arguments
4038 appearing in the same order as the SLP node operands which for the
4039 loop PHI nodes means the preheader edge dest index needs to remain
4040 the same for the analyzed loop which also becomes the vectorized one.
4041 Make it so in case the state after versioning differs by redirecting
4042 the first edge into the header to the same destination which moves
4043 it last. */
4044 if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
4046 edge e = EDGE_PRED (loop->header, 0);
4047 ssa_redirect_edge (e, e->dest);
4048 flush_pending_stmts (e);
4050 gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
4052 /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
4053 reap those otherwise; they also refer to the original
4054 loops. */
4055 class loop *l = loop;
4056 while (gimple *call = vect_loop_vectorized_call (l))
4058 call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
4059 fold_loop_internal_call (call, boolean_false_node);
4060 l = loop_outer (l);
4062 free_original_copy_tables ();
4064 if (cond_expr_stmt_list)
4066 cond_exp_gsi = gsi_last_bb (condition_bb);
4067 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
4068 GSI_SAME_STMT);
4071 /* Loop versioning violates an assumption we try to maintain during
4072 vectorization - that the loop exit block has a single predecessor.
4073 After versioning, the exit block of both loop versions is the same
4074 basic block (i.e. it has two predecessors). Just in order to simplify
4075 following transformations in the vectorizer, we fix this situation
4076 here by adding a new (empty) block on the exit-edge of the loop,
4077 with the proper loop-exit phis to maintain loop-closed-form.
4078 If loop versioning wasn't done from loop, but scalar_loop instead,
4079 merge_bb will have already just a single successor. */
4081 merge_bb = single_exit (loop_to_version)->dest;
4082 if (EDGE_COUNT (merge_bb->preds) >= 2)
4084 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
4085 new_exit_bb = split_edge (single_exit (loop_to_version));
4086 new_exit_e = single_exit (loop_to_version);
4087 e = EDGE_SUCC (new_exit_bb, 0);
4089 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
4090 gsi_next (&gsi))
4092 tree new_res;
4093 orig_phi = gsi.phi ();
4094 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
4095 new_phi = create_phi_node (new_res, new_exit_bb);
4096 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4097 add_phi_arg (new_phi, arg, new_exit_e,
4098 gimple_phi_arg_location_from_edge (orig_phi, e));
4099 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
4103 update_ssa (TODO_update_ssa_no_phi);
4106 /* Split the cost model check off to a separate BB. Costing assumes
4107 this is the only thing we perform when we enter the scalar loop
4108 from a failed cost decision. */
4109 if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
4111 gimple *def = SSA_NAME_DEF_STMT (cost_name);
4112 gcc_assert (gimple_bb (def) == condition_bb);
4113 /* All uses of the cost check are 'true' after the check we
4114 are going to insert. */
4115 replace_uses_by (cost_name, boolean_true_node);
4116 /* And we're going to build the new single use of it. */
4117 gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
4118 NULL_TREE, NULL_TREE);
4119 edge e = split_block (gimple_bb (def), def);
4120 gimple_stmt_iterator gsi = gsi_for_stmt (def);
4121 gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
4122 edge true_e, false_e;
4123 extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
4124 e->flags &= ~EDGE_FALLTHRU;
4125 e->flags |= EDGE_TRUE_VALUE;
4126 edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
4127 e->probability = prob2;
4128 e2->probability = prob2.invert ();
4129 e->dest->count = e->count ();
4130 set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
4131 auto_vec<basic_block, 3> adj;
4132 for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
4133 son;
4134 son = next_dom_son (CDI_DOMINATORS, son))
4135 if (EDGE_COUNT (son->preds) > 1)
4136 adj.safe_push (son);
4137 for (auto son : adj)
4138 set_immediate_dominator (CDI_DOMINATORS, son, e->src);
4139 //debug_bb (condition_bb);
4140 //debug_bb (e->src);
4143 if (version_niter)
4145 /* The versioned loop could be infinite, we need to clear existing
4146 niter information which is copied from the original loop. */
4147 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
4148 vect_free_loop_info_assumptions (nloop);
4151 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
4152 && dump_enabled_p ())
4154 if (version_alias)
4155 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
4156 vect_location,
4157 "loop versioned for vectorization because of "
4158 "possible aliasing\n");
4159 if (version_align)
4160 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
4161 vect_location,
4162 "loop versioned for vectorization to enhance "
4163 "alignment\n");
4167 return nloop;