Fix couple of endianness issues in fold_ctor_reference
[official-gcc.git] / gcc / tree-vect-loop-manip.cc
blob20f570e4a0d64610d7b63fe492eba5254ab5dc2c
1 /* Vectorizer Specific Loop Manipulations
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "fold-const.h"
32 #include "cfganal.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "tree-cfg.h"
37 #include "tree-ssa-loop-manip.h"
38 #include "tree-into-ssa.h"
39 #include "tree-ssa.h"
40 #include "cfgloop.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "tree-ssa-loop-ivopts.h"
44 #include "gimple-fold.h"
45 #include "tree-ssa-loop-niter.h"
46 #include "internal-fn.h"
47 #include "stor-layout.h"
48 #include "optabs-query.h"
49 #include "vec-perm-indices.h"
50 #include "insn-config.h"
51 #include "rtl.h"
52 #include "recog.h"
53 #include "langhooks.h"
54 #include "tree-vector-builder.h"
55 #include "optabs-tree.h"
57 /*************************************************************************
58 Simple Loop Peeling Utilities
60 Utilities to support loop peeling for vectorization purposes.
61 *************************************************************************/
64 /* Renames the use *OP_P. */
66 static void
67 rename_use_op (use_operand_p op_p)
69 tree new_name;
71 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
72 return;
74 new_name = get_current_def (USE_FROM_PTR (op_p));
76 /* Something defined outside of the loop. */
77 if (!new_name)
78 return;
80 /* An ordinary ssa name defined in the loop. */
82 SET_USE (op_p, new_name);
86 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
87 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
88 true. */
90 static void
91 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
93 gimple *stmt;
94 use_operand_p use_p;
95 ssa_op_iter iter;
96 edge e;
97 edge_iterator ei;
98 class loop *loop = bb->loop_father;
99 class loop *outer_loop = NULL;
101 if (rename_from_outer_loop)
103 gcc_assert (loop);
104 outer_loop = loop_outer (loop);
107 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
108 gsi_next (&gsi))
110 stmt = gsi_stmt (gsi);
111 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
112 rename_use_op (use_p);
115 FOR_EACH_EDGE (e, ei, bb->preds)
117 if (!flow_bb_inside_loop_p (loop, e->src))
119 if (!rename_from_outer_loop)
120 continue;
121 if (e->src != outer_loop->header)
123 if (outer_loop->inner->next)
125 /* If outer_loop has 2 inner loops, allow there to
126 be an extra basic block which decides which of the
127 two loops to use using LOOP_VECTORIZED. */
128 if (!single_pred_p (e->src)
129 || single_pred (e->src) != outer_loop->header)
130 continue;
134 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
135 gsi_next (&gsi))
136 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
141 struct adjust_info
143 tree from, to;
144 basic_block bb;
147 /* A stack of values to be adjusted in debug stmts. We have to
148 process them LIFO, so that the closest substitution applies. If we
149 processed them FIFO, without the stack, we might substitute uses
150 with a PHI DEF that would soon become non-dominant, and when we got
151 to the suitable one, it wouldn't have anything to substitute any
152 more. */
153 static vec<adjust_info, va_heap> adjust_vec;
155 /* Adjust any debug stmts that referenced AI->from values to use the
156 loop-closed AI->to, if the references are dominated by AI->bb and
157 not by the definition of AI->from. */
159 static void
160 adjust_debug_stmts_now (adjust_info *ai)
162 basic_block bbphi = ai->bb;
163 tree orig_def = ai->from;
164 tree new_def = ai->to;
165 imm_use_iterator imm_iter;
166 gimple *stmt;
167 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
169 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
171 /* Adjust any debug stmts that held onto non-loop-closed
172 references. */
173 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
175 use_operand_p use_p;
176 basic_block bbuse;
178 if (!is_gimple_debug (stmt))
179 continue;
181 gcc_assert (gimple_debug_bind_p (stmt));
183 bbuse = gimple_bb (stmt);
185 if ((bbuse == bbphi
186 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
187 && !(bbuse == bbdef
188 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
190 if (new_def)
191 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
192 SET_USE (use_p, new_def);
193 else
195 gimple_debug_bind_reset_value (stmt);
196 update_stmt (stmt);
202 /* Adjust debug stmts as scheduled before. */
204 static void
205 adjust_vec_debug_stmts (void)
207 if (!MAY_HAVE_DEBUG_BIND_STMTS)
208 return;
210 gcc_assert (adjust_vec.exists ());
212 while (!adjust_vec.is_empty ())
214 adjust_debug_stmts_now (&adjust_vec.last ());
215 adjust_vec.pop ();
219 /* Adjust any debug stmts that referenced FROM values to use the
220 loop-closed TO, if the references are dominated by BB and not by
221 the definition of FROM. If adjust_vec is non-NULL, adjustments
222 will be postponed until adjust_vec_debug_stmts is called. */
224 static void
225 adjust_debug_stmts (tree from, tree to, basic_block bb)
227 adjust_info ai;
229 if (MAY_HAVE_DEBUG_BIND_STMTS
230 && TREE_CODE (from) == SSA_NAME
231 && ! SSA_NAME_IS_DEFAULT_DEF (from)
232 && ! virtual_operand_p (from))
234 ai.from = from;
235 ai.to = to;
236 ai.bb = bb;
238 if (adjust_vec.exists ())
239 adjust_vec.safe_push (ai);
240 else
241 adjust_debug_stmts_now (&ai);
245 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
246 to adjust any debug stmts that referenced the old phi arg,
247 presumably non-loop-closed references left over from other
248 transformations. */
250 static void
251 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
253 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
255 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
257 if (MAY_HAVE_DEBUG_BIND_STMTS)
258 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
259 gimple_bb (update_phi));
262 /* Define one loop rgroup control CTRL from loop LOOP. INIT_CTRL is the value
263 that the control should have during the first iteration and NEXT_CTRL is the
264 value that it should have on subsequent iterations. */
266 static void
267 vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
268 tree next_ctrl)
270 gphi *phi = create_phi_node (ctrl, loop->header);
271 add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
272 add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
275 /* Add SEQ to the end of LOOP's preheader block. */
277 static void
278 add_preheader_seq (class loop *loop, gimple_seq seq)
280 if (seq)
282 edge pe = loop_preheader_edge (loop);
283 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
284 gcc_assert (!new_bb);
288 /* Add SEQ to the beginning of LOOP's header block. */
290 static void
291 add_header_seq (class loop *loop, gimple_seq seq)
293 if (seq)
295 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
296 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
300 /* Return true if the target can interleave elements of two vectors.
301 OFFSET is 0 if the first half of the vectors should be interleaved
302 or 1 if the second half should. When returning true, store the
303 associated permutation in INDICES. */
305 static bool
306 interleave_supported_p (vec_perm_indices *indices, tree vectype,
307 unsigned int offset)
309 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
310 poly_uint64 base = exact_div (nelts, 2) * offset;
311 vec_perm_builder sel (nelts, 2, 3);
312 for (unsigned int i = 0; i < 3; ++i)
314 sel.quick_push (base + i);
315 sel.quick_push (base + i + nelts);
317 indices->new_vector (sel, 2, nelts);
318 return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
319 *indices);
322 /* Try to use permutes to define the masks in DEST_RGM using the masks
323 in SRC_RGM, given that the former has twice as many masks as the
324 latter. Return true on success, adding any new statements to SEQ. */
326 static bool
327 vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
328 rgroup_controls *src_rgm)
330 tree src_masktype = src_rgm->type;
331 tree dest_masktype = dest_rgm->type;
332 machine_mode src_mode = TYPE_MODE (src_masktype);
333 insn_code icode1, icode2;
334 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
335 && (icode1 = optab_handler (vec_unpacku_hi_optab,
336 src_mode)) != CODE_FOR_nothing
337 && (icode2 = optab_handler (vec_unpacku_lo_optab,
338 src_mode)) != CODE_FOR_nothing)
340 /* Unpacking the source masks gives at least as many mask bits as
341 we need. We can then VIEW_CONVERT any excess bits away. */
342 machine_mode dest_mode = insn_data[icode1].operand[0].mode;
343 gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
344 tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
345 for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
347 tree src = src_rgm->controls[i / 2];
348 tree dest = dest_rgm->controls[i];
349 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
350 ? VEC_UNPACK_HI_EXPR
351 : VEC_UNPACK_LO_EXPR);
352 gassign *stmt;
353 if (dest_masktype == unpack_masktype)
354 stmt = gimple_build_assign (dest, code, src);
355 else
357 tree temp = make_ssa_name (unpack_masktype);
358 stmt = gimple_build_assign (temp, code, src);
359 gimple_seq_add_stmt (seq, stmt);
360 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
361 build1 (VIEW_CONVERT_EXPR,
362 dest_masktype, temp));
364 gimple_seq_add_stmt (seq, stmt);
366 return true;
368 vec_perm_indices indices[2];
369 if (dest_masktype == src_masktype
370 && interleave_supported_p (&indices[0], src_masktype, 0)
371 && interleave_supported_p (&indices[1], src_masktype, 1))
373 /* The destination requires twice as many mask bits as the source, so
374 we can use interleaving permutes to double up the number of bits. */
375 tree masks[2];
376 for (unsigned int i = 0; i < 2; ++i)
377 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
378 for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
380 tree src = src_rgm->controls[i / 2];
381 tree dest = dest_rgm->controls[i];
382 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
383 src, src, masks[i & 1]);
384 gimple_seq_add_stmt (seq, stmt);
386 return true;
388 return false;
391 /* Populate DEST_RGM->controls, given that they should add up to STEP.
393 STEP = MIN_EXPR <ivtmp_34, VF>;
395 First length (MIN (X, VF/N)):
396 loop_len_15 = MIN_EXPR <STEP, VF/N>;
398 Second length:
399 tmp = STEP - loop_len_15;
400 loop_len_16 = MIN (tmp, VF/N);
402 Third length:
403 tmp2 = tmp - loop_len_16;
404 loop_len_17 = MIN (tmp2, VF/N);
406 Last length:
407 loop_len_18 = tmp2 - loop_len_17;
410 static void
411 vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
412 rgroup_controls *dest_rgm, tree step)
414 tree ctrl_type = dest_rgm->type;
415 poly_uint64 nitems_per_ctrl
416 = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
417 tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
419 for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
421 tree ctrl = dest_rgm->controls[i];
422 if (i == 0)
424 /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N]. */
425 gassign *assign
426 = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
427 gimple_seq_add_stmt (seq, assign);
429 else if (i == dest_rgm->controls.length () - 1)
431 /* Last iteration: Remain capped to the range [0, VF/N]. */
432 gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
433 dest_rgm->controls[i - 1]);
434 gimple_seq_add_stmt (seq, assign);
436 else
438 /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N]. */
439 step = gimple_build (seq, MINUS_EXPR, iv_type, step,
440 dest_rgm->controls[i - 1]);
441 gassign *assign
442 = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
443 gimple_seq_add_stmt (seq, assign);
448 /* Helper for vect_set_loop_condition_partial_vectors. Generate definitions
449 for all the rgroup controls in RGC and return a control that is nonzero
450 when the loop needs to iterate. Add any new preheader statements to
451 PREHEADER_SEQ. Use LOOP_COND_GSI to insert code before the exit gcond.
453 RGC belongs to loop LOOP. The loop originally iterated NITERS
454 times and has been vectorized according to LOOP_VINFO.
456 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
457 starts with NITERS_SKIP dummy iterations of the scalar loop before
458 the real work starts. The mask elements for these dummy iterations
459 must be 0, to ensure that the extra iterations do not have an effect.
461 It is known that:
463 NITERS * RGC->max_nscalars_per_iter * RGC->factor
465 does not overflow. However, MIGHT_WRAP_P says whether an induction
466 variable that starts at 0 and has step:
468 VF * RGC->max_nscalars_per_iter * RGC->factor
470 might overflow before hitting a value above:
472 (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
474 This means that we cannot guarantee that such an induction variable
475 would ever hit a value that produces a set of all-false masks or zero
476 lengths for RGC.
478 Note: the cost of the code generated by this function is modeled
479 by vect_estimate_min_profitable_iters, so changes here may need
480 corresponding changes there. */
482 static tree
483 vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
484 gimple_seq *preheader_seq,
485 gimple_seq *header_seq,
486 gimple_stmt_iterator loop_cond_gsi,
487 rgroup_controls *rgc, tree niters,
488 tree niters_skip, bool might_wrap_p,
489 tree *iv_step, tree *compare_step)
491 tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
492 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
493 bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
495 tree ctrl_type = rgc->type;
496 unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
497 poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
498 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
499 tree length_limit = NULL_TREE;
500 /* For length, we need length_limit to ensure length in range. */
501 if (!use_masks_p)
502 length_limit = build_int_cst (compare_type, nitems_per_ctrl);
504 /* Calculate the maximum number of item values that the rgroup
505 handles in total, the number that it handles for each iteration
506 of the vector loop, and the number that it should skip during the
507 first iteration of the vector loop. */
508 tree nitems_total = niters;
509 tree nitems_step = build_int_cst (iv_type, vf);
510 tree nitems_skip = niters_skip;
511 if (nitems_per_iter != 1)
513 /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
514 these multiplications don't overflow. */
515 tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
516 tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
517 nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
518 nitems_total, compare_factor);
519 nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
520 nitems_step, iv_factor);
521 if (nitems_skip)
522 nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
523 nitems_skip, compare_factor);
526 /* Create an induction variable that counts the number of items
527 processed. */
528 tree index_before_incr, index_after_incr;
529 gimple_stmt_iterator incr_gsi;
530 bool insert_after;
531 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
532 if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
534 /* Create an IV that counts down from niters_total and whose step
535 is the (variable) amount processed in the current iteration:
537 _10 = (unsigned long) count_12(D);
539 # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
540 _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
542 vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
544 ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
546 if (ivtmp_9 > POLY_INT_CST [4, 4])
547 goto <bb 4>; [83.33%]
548 else
549 goto <bb 5>; [16.67%]
551 nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
552 tree step = rgc->controls.length () == 1 ? rgc->controls[0]
553 : make_ssa_name (iv_type);
554 /* Create decrement IV. */
555 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
557 create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
558 insert_after, &index_before_incr, &index_after_incr);
559 tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
560 index_before_incr, nitems_step);
561 gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
563 else
565 create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
566 &incr_gsi, insert_after, &index_before_incr,
567 &index_after_incr);
568 gimple_seq_add_stmt (header_seq,
569 gimple_build_assign (step, MIN_EXPR,
570 index_before_incr,
571 nitems_step));
573 *iv_step = step;
574 *compare_step = nitems_step;
575 return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
576 : index_before_incr;
579 /* Create increment IV. */
580 create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
581 loop, &incr_gsi, insert_after, &index_before_incr,
582 &index_after_incr);
584 tree zero_index = build_int_cst (compare_type, 0);
585 tree test_index, test_limit, first_limit;
586 gimple_stmt_iterator *test_gsi;
587 if (might_wrap_p)
589 /* In principle the loop should stop iterating once the incremented
590 IV reaches a value greater than or equal to:
592 NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
594 However, there's no guarantee that this addition doesn't overflow
595 the comparison type, or that the IV hits a value above it before
596 wrapping around. We therefore adjust the limit down by one
597 IV step:
599 (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
600 -[infinite-prec] NITEMS_STEP
602 and compare the IV against this limit _before_ incrementing it.
603 Since the comparison type is unsigned, we actually want the
604 subtraction to saturate at zero:
606 (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
607 -[sat] NITEMS_STEP
609 And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
611 NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
613 where the rightmost subtraction can be done directly in
614 COMPARE_TYPE. */
615 test_index = index_before_incr;
616 tree adjust = gimple_convert (preheader_seq, compare_type,
617 nitems_step);
618 if (nitems_skip)
619 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
620 adjust, nitems_skip);
621 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
622 nitems_total, adjust);
623 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
624 test_limit, adjust);
625 test_gsi = &incr_gsi;
627 /* Get a safe limit for the first iteration. */
628 if (nitems_skip)
630 /* The first vector iteration can handle at most NITEMS_STEP
631 items. NITEMS_STEP <= CONST_LIMIT, and adding
632 NITEMS_SKIP to that cannot overflow. */
633 tree const_limit = build_int_cst (compare_type,
634 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
635 * nitems_per_iter);
636 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
637 nitems_total, const_limit);
638 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
639 first_limit, nitems_skip);
641 else
642 /* For the first iteration it doesn't matter whether the IV hits
643 a value above NITEMS_TOTAL. That only matters for the latch
644 condition. */
645 first_limit = nitems_total;
647 else
649 /* Test the incremented IV, which will always hit a value above
650 the bound before wrapping. */
651 test_index = index_after_incr;
652 test_limit = nitems_total;
653 if (nitems_skip)
654 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
655 test_limit, nitems_skip);
656 test_gsi = &loop_cond_gsi;
658 first_limit = test_limit;
661 /* Convert the IV value to the comparison type (either a no-op or
662 a demotion). */
663 gimple_seq test_seq = NULL;
664 test_index = gimple_convert (&test_seq, compare_type, test_index);
665 gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
667 /* Provide a definition of each control in the group. */
668 tree next_ctrl = NULL_TREE;
669 tree ctrl;
670 unsigned int i;
671 FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
673 /* Previous controls will cover BIAS items. This control covers the
674 next batch. */
675 poly_uint64 bias = nitems_per_ctrl * i;
676 tree bias_tree = build_int_cst (compare_type, bias);
678 /* See whether the first iteration of the vector loop is known
679 to have a full control. */
680 poly_uint64 const_limit;
681 bool first_iteration_full
682 = (poly_int_tree_p (first_limit, &const_limit)
683 && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
685 /* Rather than have a new IV that starts at BIAS and goes up to
686 TEST_LIMIT, prefer to use the same 0-based IV for each control
687 and adjust the bound down by BIAS. */
688 tree this_test_limit = test_limit;
689 if (i != 0)
691 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
692 compare_type, this_test_limit,
693 bias_tree);
694 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
695 compare_type, this_test_limit,
696 bias_tree);
699 /* Create the initial control. First include all items that
700 are within the loop limit. */
701 tree init_ctrl = NULL_TREE;
702 if (!first_iteration_full)
704 tree start, end;
705 if (first_limit == test_limit)
707 /* Use a natural test between zero (the initial IV value)
708 and the loop limit. The "else" block would be valid too,
709 but this choice can avoid the need to load BIAS_TREE into
710 a register. */
711 start = zero_index;
712 end = this_test_limit;
714 else
716 /* FIRST_LIMIT is the maximum number of items handled by the
717 first iteration of the vector loop. Test the portion
718 associated with this control. */
719 start = bias_tree;
720 end = first_limit;
723 if (use_masks_p)
724 init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
725 start, end, "max_mask");
726 else
728 init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
729 gimple_seq seq = vect_gen_len (init_ctrl, start,
730 end, length_limit);
731 gimple_seq_add_seq (preheader_seq, seq);
735 /* Now AND out the bits that are within the number of skipped
736 items. */
737 poly_uint64 const_skip;
738 if (nitems_skip
739 && !(poly_int_tree_p (nitems_skip, &const_skip)
740 && known_le (const_skip, bias)))
742 gcc_assert (use_masks_p);
743 tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
744 bias_tree, nitems_skip);
745 if (init_ctrl)
746 init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
747 init_ctrl, unskipped_mask);
748 else
749 init_ctrl = unskipped_mask;
752 if (!init_ctrl)
754 /* First iteration is full. */
755 if (use_masks_p)
756 init_ctrl = build_minus_one_cst (ctrl_type);
757 else
758 init_ctrl = length_limit;
761 /* Get the control value for the next iteration of the loop. */
762 if (use_masks_p)
764 gimple_seq stmts = NULL;
765 next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
766 this_test_limit, "next_mask");
767 gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
769 else
771 next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
772 gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
773 length_limit);
774 gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
777 vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
780 int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
781 if (partial_load_bias != 0)
783 tree adjusted_len = rgc->bias_adjusted_ctrl;
784 gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
785 rgc->controls[0],
786 build_int_cst
787 (TREE_TYPE (rgc->controls[0]),
788 partial_load_bias));
789 gimple_seq_add_stmt (header_seq, minus);
792 return next_ctrl;
795 /* Set up the iteration condition and rgroup controls for LOOP, given
796 that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
797 loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
798 the number of iterations of the original scalar loop that should be
799 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
800 for vect_set_loop_condition.
802 Insert the branch-back condition before LOOP_COND_GSI and return the
803 final gcond. */
805 static gcond *
806 vect_set_loop_condition_partial_vectors (class loop *loop,
807 loop_vec_info loop_vinfo, tree niters,
808 tree final_iv, bool niters_maybe_zero,
809 gimple_stmt_iterator loop_cond_gsi)
811 gimple_seq preheader_seq = NULL;
812 gimple_seq header_seq = NULL;
814 bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
815 tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
816 unsigned int compare_precision = TYPE_PRECISION (compare_type);
817 tree orig_niters = niters;
819 /* Type of the initial value of NITERS. */
820 tree ni_actual_type = TREE_TYPE (niters);
821 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
822 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
823 if (niters_skip)
824 niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
826 /* Convert NITERS to the same size as the compare. */
827 if (compare_precision > ni_actual_precision
828 && niters_maybe_zero)
830 /* We know that there is always at least one iteration, so if the
831 count is zero then it must have wrapped. Cope with this by
832 subtracting 1 before the conversion and adding 1 to the result. */
833 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
834 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
835 niters, build_minus_one_cst (ni_actual_type));
836 niters = gimple_convert (&preheader_seq, compare_type, niters);
837 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
838 niters, build_one_cst (compare_type));
840 else
841 niters = gimple_convert (&preheader_seq, compare_type, niters);
843 /* Iterate over all the rgroups and fill in their controls. We could use
844 the first control from any rgroup for the loop condition; here we
845 arbitrarily pick the last. */
846 tree test_ctrl = NULL_TREE;
847 tree iv_step = NULL_TREE;
848 tree compare_step = NULL_TREE;
849 rgroup_controls *rgc;
850 rgroup_controls *iv_rgc = nullptr;
851 unsigned int i;
852 auto_vec<rgroup_controls> *controls = use_masks_p
853 ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
854 : &LOOP_VINFO_LENS (loop_vinfo);
855 FOR_EACH_VEC_ELT (*controls, i, rgc)
856 if (!rgc->controls.is_empty ())
858 /* First try using permutes. This adds a single vector
859 instruction to the loop for each mask, but needs no extra
860 loop invariants or IVs. */
861 unsigned int nmasks = i + 1;
862 if (use_masks_p && (nmasks & 1) == 0)
864 rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
865 if (!half_rgc->controls.is_empty ()
866 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
867 continue;
870 if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
871 || !iv_rgc
872 || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
873 != rgc->max_nscalars_per_iter * rgc->factor))
875 /* See whether zero-based IV would ever generate all-false masks
876 or zero length before wrapping around. */
877 bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
879 /* Set up all controls for this group. */
880 test_ctrl
881 = vect_set_loop_controls_directly (loop, loop_vinfo,
882 &preheader_seq, &header_seq,
883 loop_cond_gsi, rgc, niters,
884 niters_skip, might_wrap_p,
885 &iv_step, &compare_step);
887 iv_rgc = rgc;
890 if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
891 && rgc->controls.length () > 1)
893 /* vect_set_loop_controls_directly creates an IV whose step
894 is equal to the expected sum of RGC->controls. Use that
895 information to populate RGC->controls. */
896 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
897 gcc_assert (iv_step);
898 vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
902 /* Emit all accumulated statements. */
903 add_preheader_seq (loop, preheader_seq);
904 add_header_seq (loop, header_seq);
906 /* Get a boolean result that tells us whether to iterate. */
907 edge exit_edge = single_exit (loop);
908 gcond *cond_stmt;
909 if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
910 && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
912 gcc_assert (compare_step);
913 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
914 cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
915 NULL_TREE);
917 else
919 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
920 tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
921 cond_stmt
922 = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
924 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
926 /* The loop iterates (NITERS - 1) / VF + 1 times.
927 Subtract one from this to get the latch count. */
928 tree step = build_int_cst (compare_type,
929 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
930 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
931 build_minus_one_cst (compare_type));
932 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
933 niters_minus_one, step);
935 if (final_iv)
937 gassign *assign = gimple_build_assign (final_iv, orig_niters);
938 gsi_insert_on_edge_immediate (single_exit (loop), assign);
941 return cond_stmt;
944 /* Set up the iteration condition and rgroup controls for LOOP in AVX512
945 style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
946 vectorized loop. LOOP_VINFO describes the vectorization of LOOP. NITERS is
947 the number of iterations of the original scalar loop that should be
948 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are as
949 for vect_set_loop_condition.
951 Insert the branch-back condition before LOOP_COND_GSI and return the
952 final gcond. */
954 static gcond *
955 vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
956 loop_vec_info loop_vinfo, tree niters,
957 tree final_iv,
958 bool niters_maybe_zero,
959 gimple_stmt_iterator loop_cond_gsi)
961 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
962 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
963 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
964 tree orig_niters = niters;
965 gimple_seq preheader_seq = NULL;
967 /* Create an IV that counts down from niters and whose step
968 is the number of iterations processed in the current iteration.
969 Produce the controls with compares like the following.
971 # iv_2 = PHI <niters, iv_3>
972 rem_4 = MIN <iv_2, VF>;
973 remv_6 = { rem_4, rem_4, rem_4, ... }
974 mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
975 iv_3 = iv_2 - VF;
976 if (iv_2 > VF)
977 continue;
979 Where the constant is built with elements at most VF - 1 and
980 repetitions according to max_nscalars_per_iter which is guarnateed
981 to be the same within a group. */
983 /* Convert NITERS to the determined IV type. */
984 if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
985 && niters_maybe_zero)
987 /* We know that there is always at least one iteration, so if the
988 count is zero then it must have wrapped. Cope with this by
989 subtracting 1 before the conversion and adding 1 to the result. */
990 gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
991 niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
992 niters, build_minus_one_cst (TREE_TYPE (niters)));
993 niters = gimple_convert (&preheader_seq, iv_type, niters);
994 niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
995 niters, build_one_cst (iv_type));
997 else
998 niters = gimple_convert (&preheader_seq, iv_type, niters);
1000 /* Bias the initial value of the IV in case we need to skip iterations
1001 at the beginning. */
1002 tree niters_adj = niters;
1003 if (niters_skip)
1005 tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
1006 niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
1007 iv_type, niters, skip);
1010 /* The iteration step is the vectorization factor. */
1011 tree iv_step = build_int_cst (iv_type, vf);
1013 /* Create the decrement IV. */
1014 tree index_before_incr, index_after_incr;
1015 gimple_stmt_iterator incr_gsi;
1016 bool insert_after;
1017 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1018 create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
1019 &incr_gsi, insert_after, &index_before_incr,
1020 &index_after_incr);
1022 /* Iterate over all the rgroups and fill in their controls. */
1023 for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1025 if (rgc.controls.is_empty ())
1026 continue;
1028 tree ctrl_type = rgc.type;
1029 poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
1031 tree vectype = rgc.compare_type;
1033 /* index_after_incr is the IV specifying the remaining iterations in
1034 the next iteration. */
1035 tree rem = index_after_incr;
1036 /* When the data type for the compare to produce the mask is
1037 smaller than the IV type we need to saturate. Saturate to
1038 the smallest possible value (IV_TYPE) so we only have to
1039 saturate once (CSE will catch redundant ones we add). */
1040 if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
1041 rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1042 UNKNOWN_LOCATION,
1043 MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
1044 rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
1045 UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
1047 /* Build a data vector composed of the remaining iterations. */
1048 rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
1049 UNKNOWN_LOCATION, vectype, rem);
1051 /* Provide a definition of each vector in the control group. */
1052 tree next_ctrl = NULL_TREE;
1053 tree first_rem = NULL_TREE;
1054 tree ctrl;
1055 unsigned int i;
1056 FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
1058 /* Previous controls will cover BIAS items. This control covers the
1059 next batch. */
1060 poly_uint64 bias = nitems_per_ctrl * i;
1062 /* Build the constant to compare the remaining iters against,
1063 this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
1064 split into pieces. */
1065 unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
1066 tree_vector_builder builder (vectype, n, 1);
1067 for (unsigned i = 0; i < n; ++i)
1069 unsigned HOST_WIDE_INT val
1070 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
1071 gcc_assert (val < vf.to_constant ());
1072 builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
1074 tree cmp_series = builder.build ();
1076 /* Create the initial control. First include all items that
1077 are within the loop limit. */
1078 tree init_ctrl = NULL_TREE;
1079 poly_uint64 const_limit;
1080 /* See whether the first iteration of the vector loop is known
1081 to have a full control. */
1082 if (poly_int_tree_p (niters, &const_limit)
1083 && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
1084 init_ctrl = build_minus_one_cst (ctrl_type);
1085 else
1087 /* The remaining work items initially are niters. Saturate,
1088 splat and compare. */
1089 if (!first_rem)
1091 first_rem = niters;
1092 if (TYPE_PRECISION (TREE_TYPE (vectype))
1093 < TYPE_PRECISION (iv_type))
1094 first_rem = gimple_build (&preheader_seq,
1095 MIN_EXPR, TREE_TYPE (first_rem),
1096 first_rem, iv_step);
1097 first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
1098 first_rem);
1099 first_rem = gimple_build_vector_from_val (&preheader_seq,
1100 vectype, first_rem);
1102 init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
1103 cmp_series, first_rem);
1106 /* Now AND out the bits that are within the number of skipped
1107 items. */
1108 poly_uint64 const_skip;
1109 if (niters_skip
1110 && !(poly_int_tree_p (niters_skip, &const_skip)
1111 && known_le (const_skip, bias)))
1113 /* For integer mode masks it's cheaper to shift out the bits
1114 since that avoids loading a constant. */
1115 gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
1116 init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
1117 lang_hooks.types.type_for_mode
1118 (TYPE_MODE (ctrl_type), 1),
1119 init_ctrl);
1120 /* ??? But when the shift amount isn't constant this requires
1121 a round-trip to GRPs. We could apply the bias to either
1122 side of the compare instead. */
1123 tree shift = gimple_build (&preheader_seq, MULT_EXPR,
1124 TREE_TYPE (niters_skip), niters_skip,
1125 build_int_cst (TREE_TYPE (niters_skip),
1126 rgc.max_nscalars_per_iter));
1127 init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
1128 TREE_TYPE (init_ctrl),
1129 init_ctrl, shift);
1130 init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
1131 ctrl_type, init_ctrl);
1134 /* Get the control value for the next iteration of the loop. */
1135 next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1136 UNKNOWN_LOCATION,
1137 LT_EXPR, ctrl_type, cmp_series, rem);
1139 vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
1143 /* Emit all accumulated statements. */
1144 add_preheader_seq (loop, preheader_seq);
1146 /* Adjust the exit test using the decrementing IV. */
1147 edge exit_edge = single_exit (loop);
1148 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
1149 /* When we peel for alignment with niter_skip != 0 this can
1150 cause niter + niter_skip to wrap and since we are comparing the
1151 value before the decrement here we get a false early exit.
1152 We can't compare the value after decrement either because that
1153 decrement could wrap as well as we're not doing a saturating
1154 decrement. To avoid this situation we force a larger
1155 iv_type. */
1156 gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
1157 NULL_TREE, NULL_TREE);
1158 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1160 /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
1161 Subtract one from this to get the latch count. */
1162 tree niters_minus_one
1163 = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
1164 build_minus_one_cst (TREE_TYPE (orig_niters)));
1165 tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
1166 if (niters_skip)
1167 niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
1168 fold_convert (iv_type, niters_skip));
1169 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
1170 niters_adj2, iv_step);
1172 if (final_iv)
1174 gassign *assign = gimple_build_assign (final_iv, orig_niters);
1175 gsi_insert_on_edge_immediate (single_exit (loop), assign);
1178 return cond_stmt;
1182 /* Like vect_set_loop_condition, but handle the case in which the vector
1183 loop handles exactly VF scalars per iteration. */
1185 static gcond *
1186 vect_set_loop_condition_normal (class loop *loop, tree niters, tree step,
1187 tree final_iv, bool niters_maybe_zero,
1188 gimple_stmt_iterator loop_cond_gsi)
1190 tree indx_before_incr, indx_after_incr;
1191 gcond *cond_stmt;
1192 gcond *orig_cond;
1193 edge pe = loop_preheader_edge (loop);
1194 edge exit_edge = single_exit (loop);
1195 gimple_stmt_iterator incr_gsi;
1196 bool insert_after;
1197 enum tree_code code;
1198 tree niters_type = TREE_TYPE (niters);
1200 orig_cond = get_loop_exit_condition (loop);
1201 gcc_assert (orig_cond);
1202 loop_cond_gsi = gsi_for_stmt (orig_cond);
1204 tree init, limit;
1205 if (!niters_maybe_zero && integer_onep (step))
1207 /* In this case we can use a simple 0-based IV:
1210 x = 0;
1214 x += 1;
1216 while (x < NITERS); */
1217 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1218 init = build_zero_cst (niters_type);
1219 limit = niters;
1221 else
1223 /* The following works for all values of NITERS except 0:
1226 x = 0;
1230 x += STEP;
1232 while (x <= NITERS - STEP);
1234 so that the loop continues to iterate if x + STEP - 1 < NITERS
1235 but stops if x + STEP - 1 >= NITERS.
1237 However, if NITERS is zero, x never hits a value above NITERS - STEP
1238 before wrapping around. There are two obvious ways of dealing with
1239 this:
1241 - start at STEP - 1 and compare x before incrementing it
1242 - start at -1 and compare x after incrementing it
1244 The latter is simpler and is what we use. The loop in this case
1245 looks like:
1248 x = -1;
1252 x += STEP;
1254 while (x < NITERS - STEP);
1256 In both cases the loop limit is NITERS - STEP. */
1257 gimple_seq seq = NULL;
1258 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
1259 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
1260 if (seq)
1262 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1263 gcc_assert (!new_bb);
1265 if (niters_maybe_zero)
1267 /* Case C. */
1268 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1269 init = build_all_ones_cst (niters_type);
1271 else
1273 /* Case B. */
1274 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
1275 init = build_zero_cst (niters_type);
1279 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1280 create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
1281 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
1282 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
1283 true, NULL_TREE, true,
1284 GSI_SAME_STMT);
1285 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
1286 true, GSI_SAME_STMT);
1288 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
1289 NULL_TREE);
1291 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1293 /* Record the number of latch iterations. */
1294 if (limit == niters)
1295 /* Case A: the loop iterates NITERS times. Subtract one to get the
1296 latch count. */
1297 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
1298 build_int_cst (niters_type, 1));
1299 else
1300 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
1301 Subtract one from this to get the latch count. */
1302 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
1303 limit, step);
1305 if (final_iv)
1307 gassign *assign;
1308 edge exit = single_exit (loop);
1309 gcc_assert (single_pred_p (exit->dest));
1310 tree phi_dest
1311 = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
1312 /* Make sure to maintain LC SSA form here and elide the subtraction
1313 if the value is zero. */
1314 gphi *phi = create_phi_node (phi_dest, exit->dest);
1315 add_phi_arg (phi, indx_after_incr, exit, UNKNOWN_LOCATION);
1316 if (!integer_zerop (init))
1318 assign = gimple_build_assign (final_iv, MINUS_EXPR,
1319 phi_dest, init);
1320 gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
1321 gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
1325 return cond_stmt;
1328 /* If we're using fully-masked loops, make LOOP iterate:
1330 N == (NITERS - 1) / STEP + 1
1332 times. When NITERS is zero, this is equivalent to making the loop
1333 execute (1 << M) / STEP times, where M is the precision of NITERS.
1334 NITERS_MAYBE_ZERO is true if this last case might occur.
1336 If we're not using fully-masked loops, make LOOP iterate:
1338 N == (NITERS - STEP) / STEP + 1
1340 times, where NITERS is known to be outside the range [1, STEP - 1].
1341 This is equivalent to making the loop execute NITERS / STEP times
1342 when NITERS is nonzero and (1 << M) / STEP times otherwise.
1343 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
1345 If FINAL_IV is nonnull, it is an SSA name that should be set to
1346 N * STEP on exit from the loop.
1348 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
1350 void
1351 vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
1352 tree niters, tree step, tree final_iv,
1353 bool niters_maybe_zero)
1355 gcond *cond_stmt;
1356 gcond *orig_cond = get_loop_exit_condition (loop);
1357 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
1359 if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
1361 if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
1362 cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_vinfo,
1363 niters, final_iv,
1364 niters_maybe_zero,
1365 loop_cond_gsi);
1366 else
1367 cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo,
1368 niters, final_iv,
1369 niters_maybe_zero,
1370 loop_cond_gsi);
1372 else
1373 cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv,
1374 niters_maybe_zero,
1375 loop_cond_gsi);
1377 /* Remove old loop exit test. */
1378 stmt_vec_info orig_cond_info;
1379 if (loop_vinfo
1380 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
1381 loop_vinfo->remove_stmt (orig_cond_info);
1382 else
1383 gsi_remove (&loop_cond_gsi, true);
1385 if (dump_enabled_p ())
1386 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
1387 (gimple *) cond_stmt);
1390 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
1391 For all PHI arguments in FROM->dest and TO->dest from those
1392 edges ensure that TO->dest PHI arguments have current_def
1393 to that in from. */
1395 static void
1396 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
1398 gimple_stmt_iterator gsi_from, gsi_to;
1400 for (gsi_from = gsi_start_phis (from->dest),
1401 gsi_to = gsi_start_phis (to->dest);
1402 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
1404 gimple *from_phi = gsi_stmt (gsi_from);
1405 gimple *to_phi = gsi_stmt (gsi_to);
1406 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
1407 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
1408 if (virtual_operand_p (from_arg))
1410 gsi_next (&gsi_from);
1411 continue;
1413 if (virtual_operand_p (to_arg))
1415 gsi_next (&gsi_to);
1416 continue;
1418 if (TREE_CODE (from_arg) != SSA_NAME)
1419 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
1420 else if (TREE_CODE (to_arg) == SSA_NAME
1421 && from_arg != to_arg)
1423 if (get_current_def (to_arg) == NULL_TREE)
1425 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
1426 TREE_TYPE (get_current_def
1427 (from_arg))));
1428 set_current_def (to_arg, get_current_def (from_arg));
1431 gsi_next (&gsi_from);
1432 gsi_next (&gsi_to);
1435 gphi *from_phi = get_virtual_phi (from->dest);
1436 gphi *to_phi = get_virtual_phi (to->dest);
1437 if (from_phi)
1438 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
1439 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
1443 /* Given LOOP this function generates a new copy of it and puts it
1444 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
1445 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1446 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1447 entry or exit of LOOP. */
1449 class loop *
1450 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
1451 class loop *scalar_loop, edge e)
1453 class loop *new_loop;
1454 basic_block *new_bbs, *bbs, *pbbs;
1455 bool at_exit;
1456 bool was_imm_dom;
1457 basic_block exit_dest;
1458 edge exit, new_exit;
1459 bool duplicate_outer_loop = false;
1461 exit = single_exit (loop);
1462 at_exit = (e == exit);
1463 if (!at_exit && e != loop_preheader_edge (loop))
1464 return NULL;
1466 if (scalar_loop == NULL)
1467 scalar_loop = loop;
1469 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1470 pbbs = bbs + 1;
1471 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1472 /* Allow duplication of outer loops. */
1473 if (scalar_loop->inner)
1474 duplicate_outer_loop = true;
1476 /* Generate new loop structure. */
1477 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1478 duplicate_subloops (scalar_loop, new_loop);
1480 exit_dest = exit->dest;
1481 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1482 exit_dest) == loop->header ?
1483 true : false);
1485 /* Also copy the pre-header, this avoids jumping through hoops to
1486 duplicate the loop entry PHI arguments. Create an empty
1487 pre-header unconditionally for this. */
1488 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1489 edge entry_e = single_pred_edge (preheader);
1490 bbs[0] = preheader;
1491 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1493 exit = single_exit (scalar_loop);
1494 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
1495 &exit, 1, &new_exit, NULL,
1496 at_exit ? loop->latch : e->src, true);
1497 exit = single_exit (loop);
1498 basic_block new_preheader = new_bbs[0];
1500 /* Before installing PHI arguments make sure that the edges
1501 into them match that of the scalar loop we analyzed. This
1502 makes sure the SLP tree matches up between the main vectorized
1503 loop and the epilogue vectorized copies. */
1504 if (single_succ_edge (preheader)->dest_idx
1505 != single_succ_edge (new_bbs[0])->dest_idx)
1507 basic_block swap_bb = new_bbs[1];
1508 gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1509 std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1510 EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1511 EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1513 if (duplicate_outer_loop)
1515 class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
1516 if (loop_preheader_edge (scalar_loop)->dest_idx
1517 != loop_preheader_edge (new_inner_loop)->dest_idx)
1519 basic_block swap_bb = new_inner_loop->header;
1520 gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1521 std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1522 EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1523 EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1527 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
1529 /* Skip new preheader since it's deleted if copy loop is added at entry. */
1530 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
1531 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
1533 if (scalar_loop != loop)
1535 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1536 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1537 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
1538 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1539 header) to have current_def set, so copy them over. */
1540 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
1541 exit);
1542 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1544 EDGE_SUCC (loop->latch, 0));
1547 if (at_exit) /* Add the loop copy at exit. */
1549 if (scalar_loop != loop)
1551 gphi_iterator gsi;
1552 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1554 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
1555 gsi_next (&gsi))
1557 gphi *phi = gsi.phi ();
1558 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
1559 location_t orig_locus
1560 = gimple_phi_arg_location_from_edge (phi, e);
1562 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
1565 redirect_edge_and_branch_force (e, new_preheader);
1566 flush_pending_stmts (e);
1567 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
1568 if (was_imm_dom || duplicate_outer_loop)
1569 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1571 /* And remove the non-necessary forwarder again. Keep the other
1572 one so we have a proper pre-header for the loop at the exit edge. */
1573 redirect_edge_pred (single_succ_edge (preheader),
1574 single_pred (preheader));
1575 delete_basic_block (preheader);
1576 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1577 loop_preheader_edge (scalar_loop)->src);
1579 else /* Add the copy at entry. */
1581 if (scalar_loop != loop)
1583 /* Remove the non-necessary forwarder of scalar_loop again. */
1584 redirect_edge_pred (single_succ_edge (preheader),
1585 single_pred (preheader));
1586 delete_basic_block (preheader);
1587 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1588 loop_preheader_edge (scalar_loop)->src);
1589 preheader = split_edge (loop_preheader_edge (loop));
1590 entry_e = single_pred_edge (preheader);
1593 redirect_edge_and_branch_force (entry_e, new_preheader);
1594 flush_pending_stmts (entry_e);
1595 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1597 redirect_edge_and_branch_force (new_exit, preheader);
1598 flush_pending_stmts (new_exit);
1599 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1601 /* And remove the non-necessary forwarder again. Keep the other
1602 one so we have a proper pre-header for the loop at the exit edge. */
1603 redirect_edge_pred (single_succ_edge (new_preheader),
1604 single_pred (new_preheader));
1605 delete_basic_block (new_preheader);
1606 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1607 loop_preheader_edge (new_loop)->src);
1610 if (scalar_loop != loop)
1612 /* Update new_loop->header PHIs, so that on the preheader
1613 edge they are the ones from loop rather than scalar_loop. */
1614 gphi_iterator gsi_orig, gsi_new;
1615 edge orig_e = loop_preheader_edge (loop);
1616 edge new_e = loop_preheader_edge (new_loop);
1618 for (gsi_orig = gsi_start_phis (loop->header),
1619 gsi_new = gsi_start_phis (new_loop->header);
1620 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
1621 gsi_next (&gsi_orig), gsi_next (&gsi_new))
1623 gphi *orig_phi = gsi_orig.phi ();
1624 gphi *new_phi = gsi_new.phi ();
1625 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1626 location_t orig_locus
1627 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1629 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
1633 free (new_bbs);
1634 free (bbs);
1636 checking_verify_dominators (CDI_DOMINATORS);
1638 return new_loop;
1642 /* Given the condition expression COND, put it as the last statement of
1643 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1644 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
1645 skip the loop. PROBABILITY is the skip edge's probability. Mark the
1646 new edge as irreducible if IRREDUCIBLE_P is true. */
1648 static edge
1649 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
1650 basic_block guard_to, basic_block dom_bb,
1651 profile_probability probability, bool irreducible_p)
1653 gimple_stmt_iterator gsi;
1654 edge new_e, enter_e;
1655 gcond *cond_stmt;
1656 gimple_seq gimplify_stmt_list = NULL;
1658 enter_e = EDGE_SUCC (guard_bb, 0);
1659 enter_e->flags &= ~EDGE_FALLTHRU;
1660 enter_e->flags |= EDGE_FALSE_VALUE;
1661 gsi = gsi_last_bb (guard_bb);
1663 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
1664 is_gimple_condexpr_for_cond, NULL_TREE);
1665 if (gimplify_stmt_list)
1666 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1668 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
1669 gsi = gsi_last_bb (guard_bb);
1670 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1672 /* Add new edge to connect guard block to the merge/loop-exit block. */
1673 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
1675 new_e->probability = probability;
1676 if (irreducible_p)
1677 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1679 enter_e->probability = probability.invert ();
1680 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
1682 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
1683 if (enter_e->dest->loop_father->header == enter_e->dest)
1684 split_edge (enter_e);
1686 return new_e;
1690 /* This function verifies that the following restrictions apply to LOOP:
1691 (1) it consists of exactly 2 basic blocks - header, and an empty latch
1692 for innermost loop and 5 basic blocks for outer-loop.
1693 (2) it is single entry, single exit
1694 (3) its exit condition is the last stmt in the header
1695 (4) E is the entry/exit edge of LOOP.
1698 bool
1699 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e)
1701 edge exit_e = single_exit (loop);
1702 edge entry_e = loop_preheader_edge (loop);
1703 gcond *orig_cond = get_loop_exit_condition (loop);
1704 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1705 unsigned int num_bb = loop->inner? 5 : 2;
1707 /* All loops have an outer scope; the only case loop->outer is NULL is for
1708 the function itself. */
1709 if (!loop_outer (loop)
1710 || loop->num_nodes != num_bb
1711 || !empty_block_p (loop->latch)
1712 || !single_exit (loop)
1713 /* Verify that new loop exit condition can be trivially modified. */
1714 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1715 || (e != exit_e && e != entry_e))
1716 return false;
1718 basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
1719 get_loop_body_with_size (loop, bbs, loop->num_nodes);
1720 bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
1721 free (bbs);
1722 return ret;
1725 /* Function vect_get_loop_location.
1727 Extract the location of the loop in the source code.
1728 If the loop is not well formed for vectorization, an estimated
1729 location is calculated.
1730 Return the loop location if succeed and NULL if not. */
1732 dump_user_location_t
1733 find_loop_location (class loop *loop)
1735 gimple *stmt = NULL;
1736 basic_block bb;
1737 gimple_stmt_iterator si;
1739 if (!loop)
1740 return dump_user_location_t ();
1742 stmt = get_loop_exit_condition (loop);
1744 if (stmt
1745 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1746 return stmt;
1748 /* If we got here the loop is probably not "well formed",
1749 try to estimate the loop location */
1751 if (!loop->header)
1752 return dump_user_location_t ();
1754 bb = loop->header;
1756 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1758 stmt = gsi_stmt (si);
1759 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1760 return stmt;
1763 return dump_user_location_t ();
1766 /* Return true if the phi described by STMT_INFO defines an IV of the
1767 loop to be vectorized. */
1769 static bool
1770 iv_phi_p (stmt_vec_info stmt_info)
1772 gphi *phi = as_a <gphi *> (stmt_info->stmt);
1773 if (virtual_operand_p (PHI_RESULT (phi)))
1774 return false;
1776 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1777 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1778 return false;
1780 return true;
1783 /* Return true if vectorizer can peel for nonlinear iv. */
1784 static bool
1785 vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
1786 enum vect_induction_op_type induction_type)
1788 tree niters_skip;
1789 /* Init_expr will be update by vect_update_ivs_after_vectorizer,
1790 if niters or vf is unkown:
1791 For shift, when shift mount >= precision, there would be UD.
1792 For mult, don't known how to generate
1793 init_expr * pow (step, niters) for variable niters.
1794 For neg, it should be ok, since niters of vectorized main loop
1795 will always be multiple of 2. */
1796 if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1797 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
1798 && induction_type != vect_step_op_neg)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802 "Peeling for epilogue is not supported"
1803 " for nonlinear induction except neg"
1804 " when iteration count is unknown.\n");
1805 return false;
1808 /* Also doens't support peel for neg when niter is variable.
1809 ??? generate something like niter_expr & 1 ? init_expr : -init_expr? */
1810 niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
1811 if ((niters_skip != NULL_TREE
1812 && TREE_CODE (niters_skip) != INTEGER_CST)
1813 || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
1814 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
1816 if (dump_enabled_p ())
1817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1818 "Peeling for alignement is not supported"
1819 " for nonlinear induction when niters_skip"
1820 " is not constant.\n");
1821 return false;
1824 return true;
1827 /* Function vect_can_advance_ivs_p
1829 In case the number of iterations that LOOP iterates is unknown at compile
1830 time, an epilog loop will be generated, and the loop induction variables
1831 (IVs) will be "advanced" to the value they are supposed to take just before
1832 the epilog loop. Here we check that the access function of the loop IVs
1833 and the expression that represents the loop bound are simple enough.
1834 These restrictions will be relaxed in the future. */
1836 bool
1837 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1839 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1840 basic_block bb = loop->header;
1841 gphi_iterator gsi;
1843 /* Analyze phi functions of the loop header. */
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1847 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1849 tree evolution_part;
1850 enum vect_induction_op_type induction_type;
1852 gphi *phi = gsi.phi ();
1853 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
1856 phi_info->stmt);
1858 /* Skip virtual phi's. The data dependences that are associated with
1859 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
1861 Skip reduction phis. */
1862 if (!iv_phi_p (phi_info))
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_NOTE, vect_location,
1866 "reduc or virtual phi. skip.\n");
1867 continue;
1870 induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
1871 if (induction_type != vect_step_op_add)
1873 if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, induction_type))
1874 return false;
1876 continue;
1879 /* Analyze the evolution function. */
1881 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
1882 if (evolution_part == NULL_TREE)
1884 if (dump_enabled_p ())
1885 dump_printf (MSG_MISSED_OPTIMIZATION,
1886 "No access function or evolution.\n");
1887 return false;
1890 /* FORNOW: We do not transform initial conditions of IVs
1891 which evolution functions are not invariants in the loop. */
1893 if (!expr_invariant_in_loop_p (loop, evolution_part))
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1897 "evolution not invariant in loop.\n");
1898 return false;
1901 /* FORNOW: We do not transform initial conditions of IVs
1902 which evolution functions are a polynomial of degree >= 2. */
1904 if (tree_is_chrec (evolution_part))
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "evolution is chrec.\n");
1909 return false;
1913 return true;
1917 /* Function vect_update_ivs_after_vectorizer.
1919 "Advance" the induction variables of LOOP to the value they should take
1920 after the execution of LOOP. This is currently necessary because the
1921 vectorizer does not handle induction variables that are used after the
1922 loop. Such a situation occurs when the last iterations of LOOP are
1923 peeled, because:
1924 1. We introduced new uses after LOOP for IVs that were not originally used
1925 after LOOP: the IVs of LOOP are now used by an epilog loop.
1926 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1927 times, whereas the loop IVs should be bumped N times.
1929 Input:
1930 - LOOP - a loop that is going to be vectorized. The last few iterations
1931 of LOOP were peeled.
1932 - NITERS - the number of iterations that LOOP executes (before it is
1933 vectorized). i.e, the number of times the ivs should be bumped.
1934 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1935 coming out from LOOP on which there are uses of the LOOP ivs
1936 (this is the path from LOOP->exit to epilog_loop->preheader).
1938 The new definitions of the ivs are placed in LOOP->exit.
1939 The phi args associated with the edge UPDATE_E in the bb
1940 UPDATE_E->dest are updated accordingly.
1942 Assumption 1: Like the rest of the vectorizer, this function assumes
1943 a single loop exit that has a single predecessor.
1945 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1946 organized in the same order.
1948 Assumption 3: The access function of the ivs is simple enough (see
1949 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1951 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1952 coming out of LOOP on which the ivs of LOOP are used (this is the path
1953 that leads to the epilog loop; other paths skip the epilog loop). This
1954 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1955 needs to have its phis updated.
1958 static void
1959 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1960 tree niters, edge update_e)
1962 gphi_iterator gsi, gsi1;
1963 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1964 basic_block update_bb = update_e->dest;
1965 basic_block exit_bb = single_exit (loop)->dest;
1967 /* Make sure there exists a single-predecessor exit bb: */
1968 gcc_assert (single_pred_p (exit_bb));
1969 gcc_assert (single_succ_edge (exit_bb) == update_e);
1971 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1972 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1973 gsi_next (&gsi), gsi_next (&gsi1))
1975 tree init_expr;
1976 tree step_expr, off;
1977 tree type;
1978 tree var, ni, ni_name;
1979 gimple_stmt_iterator last_gsi;
1981 gphi *phi = gsi.phi ();
1982 gphi *phi1 = gsi1.phi ();
1983 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1984 if (dump_enabled_p ())
1985 dump_printf_loc (MSG_NOTE, vect_location,
1986 "vect_update_ivs_after_vectorizer: phi: %G",
1987 (gimple *) phi);
1989 /* Skip reduction and virtual phis. */
1990 if (!iv_phi_p (phi_info))
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_NOTE, vect_location,
1994 "reduc or virtual phi. skip.\n");
1995 continue;
1998 type = TREE_TYPE (gimple_phi_result (phi));
1999 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2000 step_expr = unshare_expr (step_expr);
2002 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2003 of degree >= 2 or exponential. */
2004 gcc_assert (!tree_is_chrec (step_expr));
2006 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2007 gimple_seq stmts = NULL;
2008 enum vect_induction_op_type induction_type
2009 = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2011 if (induction_type == vect_step_op_add)
2013 tree stype = TREE_TYPE (step_expr);
2014 off = fold_build2 (MULT_EXPR, stype,
2015 fold_convert (stype, niters), step_expr);
2016 if (POINTER_TYPE_P (type))
2017 ni = fold_build_pointer_plus (init_expr, off);
2018 else
2019 ni = fold_convert (type,
2020 fold_build2 (PLUS_EXPR, stype,
2021 fold_convert (stype, init_expr),
2022 off));
2024 /* Don't bother call vect_peel_nonlinear_iv_init. */
2025 else if (induction_type == vect_step_op_neg)
2026 ni = init_expr;
2027 else
2028 ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
2029 niters, step_expr,
2030 induction_type);
2032 var = create_tmp_var (type, "tmp");
2034 last_gsi = gsi_last_bb (exit_bb);
2035 gimple_seq new_stmts = NULL;
2036 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
2037 /* Exit_bb shouldn't be empty. */
2038 if (!gsi_end_p (last_gsi))
2040 gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
2041 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
2043 else
2045 gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
2046 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
2049 /* Fix phi expressions in the successor bb. */
2050 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
2054 /* Return a gimple value containing the misalignment (measured in vector
2055 elements) for the loop described by LOOP_VINFO, i.e. how many elements
2056 it is away from a perfectly aligned address. Add any new statements
2057 to SEQ. */
2059 static tree
2060 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
2062 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2063 stmt_vec_info stmt_info = dr_info->stmt;
2064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2066 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2067 unsigned HOST_WIDE_INT target_align_c;
2068 tree target_align_minus_1;
2070 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2071 size_zero_node) < 0;
2072 tree offset = (negative
2073 ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2074 * TREE_INT_CST_LOW
2075 (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
2076 : size_zero_node);
2077 tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
2078 stmt_info, seq,
2079 offset);
2080 tree type = unsigned_type_for (TREE_TYPE (start_addr));
2081 if (target_align.is_constant (&target_align_c))
2082 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
2083 else
2085 tree vla = build_int_cst (type, target_align);
2086 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
2087 fold_build2 (MINUS_EXPR, type,
2088 build_int_cst (type, 0), vla));
2089 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
2090 build_int_cst (type, 1));
2093 HOST_WIDE_INT elem_size
2094 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2095 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
2097 /* Create: misalign_in_bytes = addr & (target_align - 1). */
2098 tree int_start_addr = fold_convert (type, start_addr);
2099 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
2100 target_align_minus_1);
2102 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
2103 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
2104 elem_size_log);
2106 return misalign_in_elems;
2109 /* Function vect_gen_prolog_loop_niters
2111 Generate the number of iterations which should be peeled as prolog for the
2112 loop represented by LOOP_VINFO. It is calculated as the misalignment of
2113 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
2114 As a result, after the execution of this loop, the data reference DR will
2115 refer to an aligned location. The following computation is generated:
2117 If the misalignment of DR is known at compile time:
2118 addr_mis = int mis = DR_MISALIGNMENT (dr);
2119 Else, compute address misalignment in bytes:
2120 addr_mis = addr & (target_align - 1)
2122 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
2124 (elem_size = element type size; an element is the scalar element whose type
2125 is the inner type of the vectype)
2127 The computations will be emitted at the end of BB. We also compute and
2128 store upper bound (included) of the result in BOUND.
2130 When the step of the data-ref in the loop is not 1 (as in interleaved data
2131 and SLP), the number of iterations of the prolog must be divided by the step
2132 (which is equal to the size of interleaved group).
2134 The above formulas assume that VF == number of elements in the vector. This
2135 may not hold when there are multiple-types in the loop.
2136 In this case, for some data-references in the loop the VF does not represent
2137 the number of elements that fit in the vector. Therefore, instead of VF we
2138 use TYPE_VECTOR_SUBPARTS. */
2140 static tree
2141 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
2142 basic_block bb, int *bound)
2144 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2145 tree var;
2146 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2147 gimple_seq stmts = NULL, new_stmts = NULL;
2148 tree iters, iters_name;
2149 stmt_vec_info stmt_info = dr_info->stmt;
2150 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2151 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2153 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2155 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2157 if (dump_enabled_p ())
2158 dump_printf_loc (MSG_NOTE, vect_location,
2159 "known peeling = %d.\n", npeel);
2161 iters = build_int_cst (niters_type, npeel);
2162 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2164 else
2166 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
2167 tree type = TREE_TYPE (misalign_in_elems);
2168 HOST_WIDE_INT elem_size
2169 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2170 /* We only do prolog peeling if the target alignment is known at compile
2171 time. */
2172 poly_uint64 align_in_elems =
2173 exact_div (target_align, elem_size);
2174 tree align_in_elems_minus_1 =
2175 build_int_cst (type, align_in_elems - 1);
2176 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
2178 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
2179 & (align_in_elems - 1)). */
2180 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2181 size_zero_node) < 0;
2182 if (negative)
2183 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
2184 align_in_elems_tree);
2185 else
2186 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
2187 misalign_in_elems);
2188 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
2189 iters = fold_convert (niters_type, iters);
2190 unsigned HOST_WIDE_INT align_in_elems_c;
2191 if (align_in_elems.is_constant (&align_in_elems_c))
2192 *bound = align_in_elems_c - 1;
2193 else
2194 *bound = -1;
2197 if (dump_enabled_p ())
2198 dump_printf_loc (MSG_NOTE, vect_location,
2199 "niters for prolog loop: %T\n", iters);
2201 var = create_tmp_var (niters_type, "prolog_loop_niters");
2202 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
2204 if (new_stmts)
2205 gimple_seq_add_seq (&stmts, new_stmts);
2206 if (stmts)
2208 gcc_assert (single_succ_p (bb));
2209 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2210 if (gsi_end_p (gsi))
2211 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2212 else
2213 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2215 return iters_name;
2219 /* Function vect_update_init_of_dr
2221 If CODE is PLUS, the vector loop starts NITERS iterations after the
2222 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
2223 iterations before the scalar one (using masking to skip inactive
2224 elements). This function updates the information recorded in DR to
2225 account for the difference. Specifically, it updates the OFFSET
2226 field of DR_INFO. */
2228 static void
2229 vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
2231 struct data_reference *dr = dr_info->dr;
2232 tree offset = dr_info->offset;
2233 if (!offset)
2234 offset = build_zero_cst (sizetype);
2236 niters = fold_build2 (MULT_EXPR, sizetype,
2237 fold_convert (sizetype, niters),
2238 fold_convert (sizetype, DR_STEP (dr)));
2239 offset = fold_build2 (code, sizetype,
2240 fold_convert (sizetype, offset), niters);
2241 dr_info->offset = offset;
2245 /* Function vect_update_inits_of_drs
2247 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
2248 CODE and NITERS are as for vect_update_inits_of_dr. */
2250 void
2251 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
2252 tree_code code)
2254 unsigned int i;
2255 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2256 struct data_reference *dr;
2258 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
2260 /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
2261 here, but since we might use these niters to update the epilogues niters
2262 and data references we can't insert them here as this definition might not
2263 always dominate its uses. */
2264 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
2265 niters = fold_convert (sizetype, niters);
2267 FOR_EACH_VEC_ELT (datarefs, i, dr)
2269 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2270 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
2271 && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
2272 vect_update_init_of_dr (dr_info, niters, code);
2276 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
2277 by masking. This involves calculating the number of iterations to
2278 be peeled and then aligning all memory references appropriately. */
2280 void
2281 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
2283 tree misalign_in_elems;
2284 tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2286 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
2288 /* From the information recorded in LOOP_VINFO get the number of iterations
2289 that need to be skipped via masking. */
2290 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2292 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2293 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
2294 misalign_in_elems = build_int_cst (type, misalign);
2296 else
2298 gimple_seq seq1 = NULL, seq2 = NULL;
2299 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
2300 misalign_in_elems = fold_convert (type, misalign_in_elems);
2301 misalign_in_elems = force_gimple_operand (misalign_in_elems,
2302 &seq2, true, NULL_TREE);
2303 gimple_seq_add_seq (&seq1, seq2);
2304 if (seq1)
2306 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2307 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
2308 gcc_assert (!new_bb);
2312 if (dump_enabled_p ())
2313 dump_printf_loc (MSG_NOTE, vect_location,
2314 "misalignment for fully-masked loop: %T\n",
2315 misalign_in_elems);
2317 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
2319 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
2322 /* This function builds ni_name = number of iterations. Statements
2323 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
2324 it to TRUE if new ssa_var is generated. */
2326 tree
2327 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
2329 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2330 if (TREE_CODE (ni) == INTEGER_CST)
2331 return ni;
2332 else
2334 tree ni_name, var;
2335 gimple_seq stmts = NULL;
2336 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2338 var = create_tmp_var (TREE_TYPE (ni), "niters");
2339 ni_name = force_gimple_operand (ni, &stmts, false, var);
2340 if (stmts)
2342 gsi_insert_seq_on_edge_immediate (pe, stmts);
2343 if (new_var_p != NULL)
2344 *new_var_p = true;
2347 return ni_name;
2351 /* Calculate the number of iterations above which vectorized loop will be
2352 preferred than scalar loop. NITERS_PROLOG is the number of iterations
2353 of prolog loop. If it's integer const, the integer number is also passed
2354 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
2355 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
2356 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
2357 threshold below which the scalar (rather than vectorized) loop will be
2358 executed. This function stores the upper bound (inclusive) of the result
2359 in BOUND_SCALAR. */
2361 static tree
2362 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
2363 int bound_prolog, poly_int64 bound_epilog, int th,
2364 poly_uint64 *bound_scalar,
2365 bool check_profitability)
2367 tree type = TREE_TYPE (niters_prolog);
2368 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
2369 build_int_cst (type, bound_epilog));
2371 *bound_scalar = bound_prolog + bound_epilog;
2372 if (check_profitability)
2374 /* TH indicates the minimum niters of vectorized loop, while we
2375 compute the maximum niters of scalar loop. */
2376 th--;
2377 /* Peeling for constant times. */
2378 if (int_niters_prolog >= 0)
2380 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
2381 return build_int_cst (type, *bound_scalar);
2383 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
2384 and BOUND_EPILOG are inclusive upper bounds. */
2385 if (known_ge (th, bound_prolog + bound_epilog))
2387 *bound_scalar = th;
2388 return build_int_cst (type, th);
2390 /* Need to do runtime comparison. */
2391 else if (maybe_gt (th, bound_epilog))
2393 *bound_scalar = upper_bound (*bound_scalar, th);
2394 return fold_build2 (MAX_EXPR, type,
2395 build_int_cst (type, th), niters);
2398 return niters;
2401 /* NITERS is the number of times that the original scalar loop executes
2402 after peeling. Work out the maximum number of iterations N that can
2403 be handled by the vectorized form of the loop and then either:
2405 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
2407 niters_vector = N
2409 b) set *STEP_VECTOR_PTR to one and generate:
2411 niters_vector = N / vf
2413 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
2414 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
2415 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
2417 void
2418 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
2419 tree *niters_vector_ptr, tree *step_vector_ptr,
2420 bool niters_no_overflow)
2422 tree ni_minus_gap, var;
2423 tree niters_vector, step_vector, type = TREE_TYPE (niters);
2424 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2425 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2426 tree log_vf = NULL_TREE;
2428 /* If epilogue loop is required because of data accesses with gaps, we
2429 subtract one iteration from the total number of iterations here for
2430 correct calculation of RATIO. */
2431 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2433 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
2434 build_one_cst (type));
2435 if (!is_gimple_val (ni_minus_gap))
2437 var = create_tmp_var (type, "ni_gap");
2438 gimple *stmts = NULL;
2439 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
2440 true, var);
2441 gsi_insert_seq_on_edge_immediate (pe, stmts);
2444 else
2445 ni_minus_gap = niters;
2447 /* To silence some unexpected warnings, simply initialize to 0. */
2448 unsigned HOST_WIDE_INT const_vf = 0;
2449 if (vf.is_constant (&const_vf)
2450 && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2452 /* Create: niters >> log2(vf) */
2453 /* If it's known that niters == number of latch executions + 1 doesn't
2454 overflow, we can generate niters >> log2(vf); otherwise we generate
2455 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
2456 will be at least one. */
2457 log_vf = build_int_cst (type, exact_log2 (const_vf));
2458 if (niters_no_overflow)
2459 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
2460 else
2461 niters_vector
2462 = fold_build2 (PLUS_EXPR, type,
2463 fold_build2 (RSHIFT_EXPR, type,
2464 fold_build2 (MINUS_EXPR, type,
2465 ni_minus_gap,
2466 build_int_cst (type, vf)),
2467 log_vf),
2468 build_int_cst (type, 1));
2469 step_vector = build_one_cst (type);
2471 else
2473 niters_vector = ni_minus_gap;
2474 step_vector = build_int_cst (type, vf);
2477 if (!is_gimple_val (niters_vector))
2479 var = create_tmp_var (type, "bnd");
2480 gimple_seq stmts = NULL;
2481 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
2482 gsi_insert_seq_on_edge_immediate (pe, stmts);
2483 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
2484 we set range information to make niters analyzer's life easier.
2485 Note the number of latch iteration value can be TYPE_MAX_VALUE so
2486 we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf. */
2487 if (stmts != NULL && log_vf)
2489 if (niters_no_overflow)
2491 value_range vr (type,
2492 wi::one (TYPE_PRECISION (type)),
2493 wi::rshift (wi::max_value (TYPE_PRECISION (type),
2494 TYPE_SIGN (type)),
2495 exact_log2 (const_vf),
2496 TYPE_SIGN (type)));
2497 set_range_info (niters_vector, vr);
2499 /* For VF == 1 the vector IV might also overflow so we cannot
2500 assert a minimum value of 1. */
2501 else if (const_vf > 1)
2503 value_range vr (type,
2504 wi::one (TYPE_PRECISION (type)),
2505 wi::rshift (wi::max_value (TYPE_PRECISION (type),
2506 TYPE_SIGN (type))
2507 - (const_vf - 1),
2508 exact_log2 (const_vf), TYPE_SIGN (type))
2509 + 1);
2510 set_range_info (niters_vector, vr);
2514 *niters_vector_ptr = niters_vector;
2515 *step_vector_ptr = step_vector;
2517 return;
2520 /* Given NITERS_VECTOR which is the number of iterations for vectorized
2521 loop specified by LOOP_VINFO after vectorization, compute the number
2522 of iterations before vectorization (niters_vector * vf) and store it
2523 to NITERS_VECTOR_MULT_VF_PTR. */
2525 static void
2526 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2527 tree niters_vector,
2528 tree *niters_vector_mult_vf_ptr)
2530 /* We should be using a step_vector of VF if VF is variable. */
2531 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
2532 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2533 tree type = TREE_TYPE (niters_vector);
2534 tree log_vf = build_int_cst (type, exact_log2 (vf));
2535 basic_block exit_bb = single_exit (loop)->dest;
2537 gcc_assert (niters_vector_mult_vf_ptr != NULL);
2538 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2539 niters_vector, log_vf);
2540 if (!is_gimple_val (niters_vector_mult_vf))
2542 tree var = create_tmp_var (type, "niters_vector_mult_vf");
2543 gimple_seq stmts = NULL;
2544 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2545 &stmts, true, var);
2546 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2547 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2549 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2552 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2553 this function searches for the corresponding lcssa phi node in exit
2554 bb of LOOP. If it is found, return the phi result; otherwise return
2555 NULL. */
2557 static tree
2558 find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
2559 gphi *lcssa_phi)
2561 gphi_iterator gsi;
2562 edge e = single_exit (loop);
2564 gcc_assert (single_pred_p (e->dest));
2565 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2567 gphi *phi = gsi.phi ();
2568 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2569 PHI_ARG_DEF (lcssa_phi, 0), 0))
2570 return PHI_RESULT (phi);
2572 return NULL_TREE;
2575 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2576 from SECOND/FIRST and puts it at the original loop's preheader/exit
2577 edge, the two loops are arranged as below:
2579 preheader_a:
2580 first_loop:
2581 header_a:
2582 i_1 = PHI<i_0, i_2>;
2584 i_2 = i_1 + 1;
2585 if (cond_a)
2586 goto latch_a;
2587 else
2588 goto between_bb;
2589 latch_a:
2590 goto header_a;
2592 between_bb:
2593 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
2595 second_loop:
2596 header_b:
2597 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
2598 or with i_2 if no LCSSA phi is created
2599 under condition of CREATE_LCSSA_FOR_IV_PHIS.
2601 i_4 = i_3 + 1;
2602 if (cond_b)
2603 goto latch_b;
2604 else
2605 goto exit_bb;
2606 latch_b:
2607 goto header_b;
2609 exit_bb:
2611 This function creates loop closed SSA for the first loop; update the
2612 second loop's PHI nodes by replacing argument on incoming edge with the
2613 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
2614 is false, Loop closed ssa phis will only be created for non-iv phis for
2615 the first loop.
2617 This function assumes exit bb of the first loop is preheader bb of the
2618 second loop, i.e, between_bb in the example code. With PHIs updated,
2619 the second loop will execute rest iterations of the first. */
2621 static void
2622 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2623 class loop *first, class loop *second,
2624 bool create_lcssa_for_iv_phis)
2626 gphi_iterator gsi_update, gsi_orig;
2627 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2629 edge first_latch_e = EDGE_SUCC (first->latch, 0);
2630 edge second_preheader_e = loop_preheader_edge (second);
2631 basic_block between_bb = single_exit (first)->dest;
2633 gcc_assert (between_bb == second_preheader_e->src);
2634 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
2635 /* Either the first loop or the second is the loop to be vectorized. */
2636 gcc_assert (loop == first || loop == second);
2638 for (gsi_orig = gsi_start_phis (first->header),
2639 gsi_update = gsi_start_phis (second->header);
2640 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2641 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2643 gphi *orig_phi = gsi_orig.phi ();
2644 gphi *update_phi = gsi_update.phi ();
2646 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
2647 /* Generate lcssa PHI node for the first loop. */
2648 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
2649 stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
2650 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
2652 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2653 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
2654 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
2655 arg = new_res;
2658 /* Update PHI node in the second loop by replacing arg on the loop's
2659 incoming edge. */
2660 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2663 /* For epilogue peeling we have to make sure to copy all LC PHIs
2664 for correct vectorization of live stmts. */
2665 if (loop == first)
2667 basic_block orig_exit = single_exit (second)->dest;
2668 for (gsi_orig = gsi_start_phis (orig_exit);
2669 !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
2671 gphi *orig_phi = gsi_orig.phi ();
2672 tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
2673 if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p (orig_arg))
2674 continue;
2676 /* Already created in the above loop. */
2677 if (find_guard_arg (first, second, orig_phi))
2678 continue;
2680 tree new_res = copy_ssa_name (orig_arg);
2681 gphi *lcphi = create_phi_node (new_res, between_bb);
2682 add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
2687 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
2688 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2689 are two pred edges of the merge point before UPDATE_LOOP. The two loops
2690 appear like below:
2692 guard_bb:
2693 if (cond)
2694 goto merge_bb;
2695 else
2696 goto skip_loop;
2698 skip_loop:
2699 header_a:
2700 i_1 = PHI<i_0, i_2>;
2702 i_2 = i_1 + 1;
2703 if (cond_a)
2704 goto latch_a;
2705 else
2706 goto exit_a;
2707 latch_a:
2708 goto header_a;
2710 exit_a:
2711 i_5 = PHI<i_2>;
2713 merge_bb:
2714 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2716 update_loop:
2717 header_b:
2718 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2720 i_4 = i_3 + 1;
2721 if (cond_b)
2722 goto latch_b;
2723 else
2724 goto exit_bb;
2725 latch_b:
2726 goto header_b;
2728 exit_bb:
2730 This function creates PHI nodes at merge_bb and replaces the use of i_5
2731 in the update_loop's PHI node with the result of new PHI result. */
2733 static void
2734 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2735 class loop *update_loop,
2736 edge guard_edge, edge merge_edge)
2738 location_t merge_loc, guard_loc;
2739 edge orig_e = loop_preheader_edge (skip_loop);
2740 edge update_e = loop_preheader_edge (update_loop);
2741 gphi_iterator gsi_orig, gsi_update;
2743 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2744 gsi_update = gsi_start_phis (update_loop->header));
2745 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2746 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2748 gphi *orig_phi = gsi_orig.phi ();
2749 gphi *update_phi = gsi_update.phi ();
2751 /* Generate new phi node at merge bb of the guard. */
2752 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2753 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2755 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
2756 args in NEW_PHI for these edges. */
2757 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2758 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2759 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2760 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2761 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2762 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2764 /* Update phi in UPDATE_PHI. */
2765 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2769 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2770 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
2771 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
2772 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
2773 The CFG looks like:
2775 loop:
2776 header_a:
2777 i_1 = PHI<i_0, i_2>;
2779 i_2 = i_1 + 1;
2780 if (cond_a)
2781 goto latch_a;
2782 else
2783 goto exit_a;
2784 latch_a:
2785 goto header_a;
2787 exit_a:
2789 guard_bb:
2790 if (cond)
2791 goto merge_bb;
2792 else
2793 goto epilog_loop;
2795 ;; fall_through_bb
2797 epilog_loop:
2798 header_b:
2799 i_3 = PHI<i_2, i_4>;
2801 i_4 = i_3 + 1;
2802 if (cond_b)
2803 goto latch_b;
2804 else
2805 goto merge_bb;
2806 latch_b:
2807 goto header_b;
2809 merge_bb:
2810 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2812 exit_bb:
2813 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
2815 For each name used out side EPILOG (i.e - for each name that has a lcssa
2816 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
2817 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
2818 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2819 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2820 in exit_bb will also be updated. */
2822 static void
2823 slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
2824 edge guard_edge, edge merge_edge)
2826 gphi_iterator gsi;
2827 basic_block merge_bb = guard_edge->dest;
2829 gcc_assert (single_succ_p (merge_bb));
2830 edge e = single_succ_edge (merge_bb);
2831 basic_block exit_bb = e->dest;
2832 gcc_assert (single_pred_p (exit_bb));
2833 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
2835 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2837 gphi *update_phi = gsi.phi ();
2838 tree old_arg = PHI_ARG_DEF (update_phi, 0);
2840 tree merge_arg = NULL_TREE;
2842 /* If the old argument is a SSA_NAME use its current_def. */
2843 if (TREE_CODE (old_arg) == SSA_NAME)
2844 merge_arg = get_current_def (old_arg);
2845 /* If it's a constant or doesn't have a current_def, just use the old
2846 argument. */
2847 if (!merge_arg)
2848 merge_arg = old_arg;
2850 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2851 /* If the var is live after loop but not a reduction, we simply
2852 use the old arg. */
2853 if (!guard_arg)
2854 guard_arg = old_arg;
2856 /* Create new phi node in MERGE_BB: */
2857 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2858 gphi *merge_phi = create_phi_node (new_res, merge_bb);
2860 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2861 the two PHI args in merge_phi for these edges. */
2862 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2863 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2865 /* Update the original phi in exit_bb. */
2866 adjust_phi_and_debug_stmts (update_phi, e, new_res);
2870 /* EPILOG loop is duplicated from the original loop for vectorizing,
2871 the arg of its loop closed ssa PHI needs to be updated. */
2873 static void
2874 slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
2876 gphi_iterator gsi;
2877 basic_block exit_bb = single_exit (epilog)->dest;
2879 gcc_assert (single_pred_p (exit_bb));
2880 edge e = EDGE_PRED (exit_bb, 0);
2881 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2882 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
2885 /* EPILOGUE_VINFO is an epilogue loop that we now know would need to
2886 iterate exactly CONST_NITERS times. Make a final decision about
2887 whether the epilogue loop should be used, returning true if so. */
2889 static bool
2890 vect_update_epilogue_niters (loop_vec_info epilogue_vinfo,
2891 unsigned HOST_WIDE_INT const_niters)
2893 /* Avoid wrap-around when computing const_niters - 1. Also reject
2894 using an epilogue loop for a single scalar iteration, even if
2895 we could in principle implement that using partial vectors. */
2896 unsigned int gap_niters = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
2897 if (const_niters <= gap_niters + 1)
2898 return false;
2900 /* Install the number of iterations. */
2901 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (epilogue_vinfo));
2902 tree niters_tree = build_int_cst (niters_type, const_niters);
2903 tree nitersm1_tree = build_int_cst (niters_type, const_niters - 1);
2905 LOOP_VINFO_NITERS (epilogue_vinfo) = niters_tree;
2906 LOOP_VINFO_NITERSM1 (epilogue_vinfo) = nitersm1_tree;
2908 /* Decide what to do if the number of epilogue iterations is not
2909 a multiple of the epilogue loop's vectorization factor. */
2910 return vect_determine_partial_vectors_and_peeling (epilogue_vinfo, true);
2913 /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
2914 Return a value that equals:
2916 - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
2917 - SKIP_VALUE when the main loop is skipped. */
2919 tree
2920 vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
2921 tree skip_value)
2923 gcc_assert (loop_vinfo->main_loop_edge);
2925 tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
2926 basic_block bb = loop_vinfo->main_loop_edge->dest;
2927 gphi *new_phi = create_phi_node (phi_result, bb);
2928 add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
2929 UNKNOWN_LOCATION);
2930 add_phi_arg (new_phi, skip_value,
2931 loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
2932 return phi_result;
2935 /* Function vect_do_peeling.
2937 Input:
2938 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2940 preheader:
2941 LOOP:
2942 header_bb:
2943 loop_body
2944 if (exit_loop_cond) goto exit_bb
2945 else goto header_bb
2946 exit_bb:
2948 - NITERS: The number of iterations of the loop.
2949 - NITERSM1: The number of iterations of the loop's latch.
2950 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2951 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2952 CHECK_PROFITABILITY is true.
2953 Output:
2954 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
2955 iterate after vectorization; see vect_set_loop_condition for details.
2956 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2957 should be set to the number of scalar iterations handled by the
2958 vector loop. The SSA name is only used on exit from the loop.
2960 This function peels prolog and epilog from the loop, adds guards skipping
2961 PROLOG and EPILOG for various conditions. As a result, the changed CFG
2962 would look like:
2964 guard_bb_1:
2965 if (prefer_scalar_loop) goto merge_bb_1
2966 else goto guard_bb_2
2968 guard_bb_2:
2969 if (skip_prolog) goto merge_bb_2
2970 else goto prolog_preheader
2972 prolog_preheader:
2973 PROLOG:
2974 prolog_header_bb:
2975 prolog_body
2976 if (exit_prolog_cond) goto prolog_exit_bb
2977 else goto prolog_header_bb
2978 prolog_exit_bb:
2980 merge_bb_2:
2982 vector_preheader:
2983 VECTOR LOOP:
2984 vector_header_bb:
2985 vector_body
2986 if (exit_vector_cond) goto vector_exit_bb
2987 else goto vector_header_bb
2988 vector_exit_bb:
2990 guard_bb_3:
2991 if (skip_epilog) goto merge_bb_3
2992 else goto epilog_preheader
2994 merge_bb_1:
2996 epilog_preheader:
2997 EPILOG:
2998 epilog_header_bb:
2999 epilog_body
3000 if (exit_epilog_cond) goto merge_bb_3
3001 else goto epilog_header_bb
3003 merge_bb_3:
3005 Note this function peels prolog and epilog only if it's necessary,
3006 as well as guards.
3007 This function returns the epilogue loop if a decision was made to vectorize
3008 it, otherwise NULL.
3010 The analysis resulting in this epilogue loop's loop_vec_info was performed
3011 in the same vect_analyze_loop call as the main loop's. At that time
3012 vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
3013 vectorization factors than the main loop. This list is stored in the main
3014 loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
3015 vectorize the epilogue loop for a lower vectorization factor, the
3016 loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
3017 updated and linked to the epilogue loop. This is later used to vectorize
3018 the epilogue. The reason the loop_vec_info needs updating is that it was
3019 constructed based on the original main loop, and the epilogue loop is a
3020 copy of this loop, so all links pointing to statements in the original loop
3021 need updating. Furthermore, these loop_vec_infos share the
3022 data_reference's records, which will also need to be updated.
3024 TODO: Guard for prefer_scalar_loop should be emitted along with
3025 versioning conditions if loop versioning is needed. */
3028 class loop *
3029 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
3030 tree *niters_vector, tree *step_vector,
3031 tree *niters_vector_mult_vf_var, int th,
3032 bool check_profitability, bool niters_no_overflow,
3033 tree *advance)
3035 edge e, guard_e;
3036 tree type = TREE_TYPE (niters), guard_cond;
3037 basic_block guard_bb, guard_to;
3038 profile_probability prob_prolog, prob_vector, prob_epilog;
3039 int estimated_vf;
3040 int prolog_peeling = 0;
3041 bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
3042 bool vect_epilogues_updated_niters = false;
3043 /* We currently do not support prolog peeling if the target alignment is not
3044 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
3045 target alignment being constant. */
3046 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3047 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
3048 return NULL;
3050 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
3051 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3053 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3054 poly_uint64 bound_epilog = 0;
3055 if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
3056 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
3057 bound_epilog += vf - 1;
3058 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3059 bound_epilog += 1;
3060 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
3061 poly_uint64 bound_scalar = bound_epilog;
3063 if (!prolog_peeling && !epilog_peeling)
3064 return NULL;
3066 /* Before doing any peeling make sure to reset debug binds outside of
3067 the loop refering to defs not in LC SSA. */
3068 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3069 for (unsigned i = 0; i < loop->num_nodes; ++i)
3071 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
3072 imm_use_iterator ui;
3073 gimple *use_stmt;
3074 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
3075 gsi_next (&gsi))
3077 FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
3078 if (gimple_debug_bind_p (use_stmt)
3079 && loop != gimple_bb (use_stmt)->loop_father
3080 && !flow_loop_nested_p (loop,
3081 gimple_bb (use_stmt)->loop_father))
3083 gimple_debug_bind_reset_value (use_stmt);
3084 update_stmt (use_stmt);
3087 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3088 gsi_next (&gsi))
3090 ssa_op_iter op_iter;
3091 def_operand_p def_p;
3092 FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
3093 FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
3094 if (gimple_debug_bind_p (use_stmt)
3095 && loop != gimple_bb (use_stmt)->loop_father
3096 && !flow_loop_nested_p (loop,
3097 gimple_bb (use_stmt)->loop_father))
3099 gimple_debug_bind_reset_value (use_stmt);
3100 update_stmt (use_stmt);
3105 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
3106 estimated_vf = vect_vf_for_cost (loop_vinfo);
3107 if (estimated_vf == 2)
3108 estimated_vf = 3;
3109 prob_prolog = prob_epilog = profile_probability::guessed_always ()
3110 .apply_scale (estimated_vf - 1, estimated_vf);
3112 class loop *prolog, *epilog = NULL;
3113 class loop *first_loop = loop;
3114 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
3116 /* SSA form needs to be up-to-date since we are going to manually
3117 update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
3118 update SSA state after that, so we have to make sure to not lose any
3119 pending update needs. */
3120 gcc_assert (!need_ssa_update_p (cfun));
3122 /* If we're vectorizing an epilogue loop, we have ensured that the
3123 virtual operand is in SSA form throughout the vectorized main loop.
3124 Normally it is possible to trace the updated
3125 vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
3126 back to scalar-stmt vuses, meaning that the effect of the SSA update
3127 remains local to the main loop. However, there are rare cases in
3128 which the vectorized loop should have vdefs even when the original scalar
3129 loop didn't. For example, vectorizing a load with IFN_LOAD_LANES
3130 introduces clobbers of the temporary vector array, which in turn
3131 needs new vdefs. If the scalar loop doesn't write to memory, these
3132 new vdefs will be the only ones in the vector loop.
3133 We are currently defering updating virtual SSA form and creating
3134 of a virtual PHI for this case so we do not have to make sure the
3135 newly introduced virtual def is in LCSSA form. */
3137 if (MAY_HAVE_DEBUG_BIND_STMTS)
3139 gcc_assert (!adjust_vec.exists ());
3140 adjust_vec.create (32);
3142 initialize_original_copy_tables ();
3144 /* Record the anchor bb at which the guard should be placed if the scalar
3145 loop might be preferred. */
3146 basic_block anchor = loop_preheader_edge (loop)->src;
3148 /* Generate the number of iterations for the prolog loop. We do this here
3149 so that we can also get the upper bound on the number of iterations. */
3150 tree niters_prolog;
3151 int bound_prolog = 0;
3152 if (prolog_peeling)
3153 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
3154 &bound_prolog);
3155 else
3156 niters_prolog = build_int_cst (type, 0);
3158 loop_vec_info epilogue_vinfo = NULL;
3159 if (vect_epilogues)
3161 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
3162 loop_vinfo->epilogue_vinfos.ordered_remove (0);
3165 tree niters_vector_mult_vf = NULL_TREE;
3166 /* Saving NITERs before the loop, as this may be changed by prologue. */
3167 tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
3168 edge update_e = NULL, skip_e = NULL;
3169 unsigned int lowest_vf = constant_lower_bound (vf);
3170 /* If we know the number of scalar iterations for the main loop we should
3171 check whether after the main loop there are enough iterations left over
3172 for the epilogue. */
3173 if (vect_epilogues
3174 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3175 && prolog_peeling >= 0
3176 && known_eq (vf, lowest_vf))
3178 unsigned HOST_WIDE_INT eiters
3179 = (LOOP_VINFO_INT_NITERS (loop_vinfo)
3180 - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
3182 eiters -= prolog_peeling;
3183 eiters
3184 = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
3186 while (!vect_update_epilogue_niters (epilogue_vinfo, eiters))
3188 delete epilogue_vinfo;
3189 epilogue_vinfo = NULL;
3190 if (loop_vinfo->epilogue_vinfos.length () == 0)
3192 vect_epilogues = false;
3193 break;
3195 epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
3196 loop_vinfo->epilogue_vinfos.ordered_remove (0);
3198 vect_epilogues_updated_niters = true;
3200 /* Prolog loop may be skipped. */
3201 bool skip_prolog = (prolog_peeling != 0);
3202 /* Skip this loop to epilog when there are not enough iterations to enter this
3203 vectorized loop. If true we should perform runtime checks on the NITERS
3204 to check whether we should skip the current vectorized loop. If we know
3205 the number of scalar iterations we may choose to add a runtime check if
3206 this number "maybe" smaller than the number of iterations required
3207 when we know the number of scalar iterations may potentially
3208 be smaller than the number of iterations required to enter this loop, for
3209 this we use the upper bounds on the prolog and epilog peeling. When we
3210 don't know the number of iterations and don't require versioning it is
3211 because we have asserted that there are enough scalar iterations to enter
3212 the main loop, so this skip is not necessary. When we are versioning then
3213 we only add such a skip if we have chosen to vectorize the epilogue. */
3214 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3215 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
3216 bound_prolog + bound_epilog)
3217 : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
3218 || vect_epilogues));
3219 /* Epilog loop must be executed if the number of iterations for epilog
3220 loop is known at compile time, otherwise we need to add a check at
3221 the end of vector loop and skip to the end of epilog loop. */
3222 bool skip_epilog = (prolog_peeling < 0
3223 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3224 || !vf.is_constant ());
3225 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
3226 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3227 skip_epilog = false;
3229 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3230 auto_vec<profile_count> original_counts;
3231 basic_block *original_bbs = NULL;
3233 if (skip_vector)
3235 split_edge (loop_preheader_edge (loop));
3237 if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
3239 original_bbs = get_loop_body (loop);
3240 for (unsigned int i = 0; i < loop->num_nodes; i++)
3241 original_counts.safe_push(original_bbs[i]->count);
3244 /* Due to the order in which we peel prolog and epilog, we first
3245 propagate probability to the whole loop. The purpose is to
3246 avoid adjusting probabilities of both prolog and vector loops
3247 separately. Note in this case, the probability of epilog loop
3248 needs to be scaled back later. */
3249 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
3250 if (prob_vector.initialized_p ())
3252 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
3253 scale_loop_profile (loop, prob_vector, 0);
3257 if (vect_epilogues)
3258 /* Make sure to set the epilogue's epilogue scalar loop, such that we can
3259 use the original scalar loop as remaining epilogue if necessary. */
3260 LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
3261 = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3263 if (prolog_peeling)
3265 e = loop_preheader_edge (loop);
3266 gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e));
3268 /* Peel prolog and put it on preheader edge of loop. */
3269 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
3270 gcc_assert (prolog);
3271 prolog->force_vectorize = false;
3272 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
3273 first_loop = prolog;
3274 reset_original_copy_tables ();
3276 /* Update the number of iterations for prolog loop. */
3277 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
3278 vect_set_loop_condition (prolog, NULL, niters_prolog,
3279 step_prolog, NULL_TREE, false);
3281 /* Skip the prolog loop. */
3282 if (skip_prolog)
3284 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3285 niters_prolog, build_int_cst (type, 0));
3286 guard_bb = loop_preheader_edge (prolog)->src;
3287 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
3288 guard_to = split_edge (loop_preheader_edge (loop));
3289 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3290 guard_to, guard_bb,
3291 prob_prolog.invert (),
3292 irred_flag);
3293 e = EDGE_PRED (guard_to, 0);
3294 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3295 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
3297 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
3298 scale_loop_profile (prolog, prob_prolog, bound_prolog);
3301 /* Update init address of DRs. */
3302 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
3303 /* Update niters for vector loop. */
3304 LOOP_VINFO_NITERS (loop_vinfo)
3305 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
3306 LOOP_VINFO_NITERSM1 (loop_vinfo)
3307 = fold_build2 (MINUS_EXPR, type,
3308 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3309 bool new_var_p = false;
3310 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
3311 /* It's guaranteed that vector loop bound before vectorization is at
3312 least VF, so set range information for newly generated var. */
3313 if (new_var_p)
3315 value_range vr (type,
3316 wi::to_wide (build_int_cst (type, lowest_vf)),
3317 wi::to_wide (TYPE_MAX_VALUE (type)));
3318 set_range_info (niters, vr);
3321 /* Prolog iterates at most bound_prolog times, latch iterates at
3322 most bound_prolog - 1 times. */
3323 record_niter_bound (prolog, bound_prolog - 1, false, true);
3324 delete_update_ssa ();
3325 adjust_vec_debug_stmts ();
3326 scev_reset ();
3329 if (epilog_peeling)
3331 e = single_exit (loop);
3332 gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e));
3334 /* Peel epilog and put it on exit edge of loop. If we are vectorizing
3335 said epilog then we should use a copy of the main loop as a starting
3336 point. This loop may have already had some preliminary transformations
3337 to allow for more optimal vectorization, for example if-conversion.
3338 If we are not vectorizing the epilog then we should use the scalar loop
3339 as the transformations mentioned above make less or no sense when not
3340 vectorizing. */
3341 epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
3342 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
3343 gcc_assert (epilog);
3345 epilog->force_vectorize = false;
3346 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
3348 /* Scalar version loop may be preferred. In this case, add guard
3349 and skip to epilog. Note this only happens when the number of
3350 iterations of loop is unknown at compile time, otherwise this
3351 won't be vectorized. */
3352 if (skip_vector)
3354 /* Additional epilogue iteration is peeled if gap exists. */
3355 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
3356 bound_prolog, bound_epilog,
3357 th, &bound_scalar,
3358 check_profitability);
3359 /* Build guard against NITERSM1 since NITERS may overflow. */
3360 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
3361 guard_bb = anchor;
3362 guard_to = split_edge (loop_preheader_edge (epilog));
3363 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3364 guard_to, guard_bb,
3365 prob_vector.invert (),
3366 irred_flag);
3367 skip_e = guard_e;
3368 e = EDGE_PRED (guard_to, 0);
3369 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3370 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
3372 /* Simply propagate profile info from guard_bb to guard_to which is
3373 a merge point of control flow. */
3374 guard_to->count = guard_bb->count;
3376 /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
3377 if (vect_epilogues || scalar_loop == NULL)
3379 gcc_assert(epilog->num_nodes == loop->num_nodes);
3380 basic_block *bbs = get_loop_body (epilog);
3381 for (unsigned int i = 0; i < epilog->num_nodes; i++)
3383 gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
3384 bbs[i]->count = original_counts[i];
3386 free (bbs);
3387 free (original_bbs);
3391 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
3392 /* If loop is peeled for non-zero constant times, now niters refers to
3393 orig_niters - prolog_peeling, it won't overflow even the orig_niters
3394 overflows. */
3395 niters_no_overflow |= (prolog_peeling > 0);
3396 vect_gen_vector_loop_niters (loop_vinfo, niters,
3397 niters_vector, step_vector,
3398 niters_no_overflow);
3399 if (!integer_onep (*step_vector))
3401 /* On exit from the loop we will have an easy way of calcalating
3402 NITERS_VECTOR / STEP * STEP. Install a dummy definition
3403 until then. */
3404 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
3405 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
3406 *niters_vector_mult_vf_var = niters_vector_mult_vf;
3408 else
3409 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
3410 &niters_vector_mult_vf);
3411 /* Update IVs of original loop as if they were advanced by
3412 niters_vector_mult_vf steps. */
3413 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
3414 update_e = skip_vector ? e : loop_preheader_edge (epilog);
3415 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
3416 update_e);
3418 if (skip_epilog)
3420 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3421 niters, niters_vector_mult_vf);
3422 guard_bb = single_exit (loop)->dest;
3423 guard_to = split_edge (single_exit (epilog));
3424 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
3425 skip_vector ? anchor : guard_bb,
3426 prob_epilog.invert (),
3427 irred_flag);
3428 if (vect_epilogues)
3429 epilogue_vinfo->skip_this_loop_edge = guard_e;
3430 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
3431 single_exit (epilog));
3432 /* Only need to handle basic block before epilog loop if it's not
3433 the guard_bb, which is the case when skip_vector is true. */
3434 if (guard_bb != bb_before_epilog)
3436 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
3438 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
3440 scale_loop_profile (epilog, prob_epilog, 0);
3442 else
3443 slpeel_update_phi_nodes_for_lcssa (epilog);
3445 unsigned HOST_WIDE_INT bound;
3446 if (bound_scalar.is_constant (&bound))
3448 gcc_assert (bound != 0);
3449 /* -1 to convert loop iterations to latch iterations. */
3450 record_niter_bound (epilog, bound - 1, false, true);
3453 delete_update_ssa ();
3454 adjust_vec_debug_stmts ();
3455 scev_reset ();
3458 if (vect_epilogues)
3460 epilog->aux = epilogue_vinfo;
3461 LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
3463 loop_constraint_clear (epilog, LOOP_C_INFINITE);
3465 /* We now must calculate the number of NITERS performed by the previous
3466 loop and EPILOGUE_NITERS to be performed by the epilogue. */
3467 tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
3468 niters_prolog, niters_vector_mult_vf);
3470 /* If skip_vector we may skip the previous loop, we insert a phi-node to
3471 determine whether we are coming from the previous vectorized loop
3472 using the update_e edge or the skip_vector basic block using the
3473 skip_e edge. */
3474 if (skip_vector)
3476 gcc_assert (update_e != NULL
3477 && skip_e != NULL
3478 && !vect_epilogues_updated_niters);
3479 gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
3480 update_e->dest);
3481 tree new_ssa = make_ssa_name (TREE_TYPE (niters));
3482 gimple *stmt = gimple_build_assign (new_ssa, niters);
3483 gimple_stmt_iterator gsi;
3484 if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
3485 && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
3487 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
3488 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
3490 else
3492 gsi = gsi_last_bb (update_e->src);
3493 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3496 niters = new_ssa;
3497 add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
3498 add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
3499 UNKNOWN_LOCATION);
3500 niters = PHI_RESULT (new_phi);
3501 epilogue_vinfo->main_loop_edge = update_e;
3502 epilogue_vinfo->skip_main_loop_edge = skip_e;
3505 /* Set ADVANCE to the number of iterations performed by the previous
3506 loop and its prologue. */
3507 *advance = niters;
3509 if (!vect_epilogues_updated_niters)
3511 /* Subtract the number of iterations performed by the vectorized loop
3512 from the number of total iterations. */
3513 tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
3514 before_loop_niters,
3515 niters);
3517 LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
3518 LOOP_VINFO_NITERSM1 (epilogue_vinfo)
3519 = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
3520 epilogue_niters,
3521 build_one_cst (TREE_TYPE (epilogue_niters)));
3523 /* Decide what to do if the number of epilogue iterations is not
3524 a multiple of the epilogue loop's vectorization factor.
3525 We should have rejected the loop during the analysis phase
3526 if this fails. */
3527 if (!vect_determine_partial_vectors_and_peeling (epilogue_vinfo,
3528 true))
3529 gcc_unreachable ();
3533 adjust_vec.release ();
3534 free_original_copy_tables ();
3536 return vect_epilogues ? epilog : NULL;
3539 /* Function vect_create_cond_for_niters_checks.
3541 Create a conditional expression that represents the run-time checks for
3542 loop's niter. The loop is guaranteed to terminate if the run-time
3543 checks hold.
3545 Input:
3546 COND_EXPR - input conditional expression. New conditions will be chained
3547 with logical AND operation. If it is NULL, then the function
3548 is used to return the number of alias checks.
3549 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3550 to be checked.
3552 Output:
3553 COND_EXPR - conditional expression.
3555 The returned COND_EXPR is the conditional expression to be used in the
3556 if statement that controls which version of the loop gets executed at
3557 runtime. */
3559 static void
3560 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
3562 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
3564 if (*cond_expr)
3565 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3566 *cond_expr, part_cond_expr);
3567 else
3568 *cond_expr = part_cond_expr;
3571 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3572 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
3574 static void
3575 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
3577 if (*cond_expr)
3578 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3579 *cond_expr, part_cond_expr);
3580 else
3581 *cond_expr = part_cond_expr;
3584 /* Function vect_create_cond_for_align_checks.
3586 Create a conditional expression that represents the alignment checks for
3587 all of data references (array element references) whose alignment must be
3588 checked at runtime.
3590 Input:
3591 COND_EXPR - input conditional expression. New conditions will be chained
3592 with logical AND operation.
3593 LOOP_VINFO - two fields of the loop information are used.
3594 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3595 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3597 Output:
3598 COND_EXPR_STMT_LIST - statements needed to construct the conditional
3599 expression.
3600 The returned value is the conditional expression to be used in the if
3601 statement that controls which version of the loop gets executed at runtime.
3603 The algorithm makes two assumptions:
3604 1) The number of bytes "n" in a vector is a power of 2.
3605 2) An address "a" is aligned if a%n is zero and that this
3606 test can be done as a&(n-1) == 0. For example, for 16
3607 byte vectors the test is a&0xf == 0. */
3609 static void
3610 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3611 tree *cond_expr,
3612 gimple_seq *cond_expr_stmt_list)
3614 const vec<stmt_vec_info> &may_misalign_stmts
3615 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3616 stmt_vec_info stmt_info;
3617 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3618 tree mask_cst;
3619 unsigned int i;
3620 tree int_ptrsize_type;
3621 char tmp_name[20];
3622 tree or_tmp_name = NULL_TREE;
3623 tree and_tmp_name;
3624 gimple *and_stmt;
3625 tree ptrsize_zero;
3626 tree part_cond_expr;
3628 /* Check that mask is one less than a power of 2, i.e., mask is
3629 all zeros followed by all ones. */
3630 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3632 int_ptrsize_type = signed_type_for (ptr_type_node);
3634 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3635 of the first vector of the i'th data reference. */
3637 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3639 gimple_seq new_stmt_list = NULL;
3640 tree addr_base;
3641 tree addr_tmp_name;
3642 tree new_or_tmp_name;
3643 gimple *addr_stmt, *or_stmt;
3644 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3645 bool negative = tree_int_cst_compare
3646 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
3647 tree offset = negative
3648 ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
3649 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
3650 : size_zero_node;
3652 /* create: addr_tmp = (int)(address_of_first_vector) */
3653 addr_base =
3654 vect_create_addr_base_for_vector_ref (loop_vinfo,
3655 stmt_info, &new_stmt_list,
3656 offset);
3657 if (new_stmt_list != NULL)
3658 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
3660 sprintf (tmp_name, "addr2int%d", i);
3661 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3662 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
3663 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
3665 /* The addresses are OR together. */
3667 if (or_tmp_name != NULL_TREE)
3669 /* create: or_tmp = or_tmp | addr_tmp */
3670 sprintf (tmp_name, "orptrs%d", i);
3671 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3672 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
3673 or_tmp_name, addr_tmp_name);
3674 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
3675 or_tmp_name = new_or_tmp_name;
3677 else
3678 or_tmp_name = addr_tmp_name;
3680 } /* end for i */
3682 mask_cst = build_int_cst (int_ptrsize_type, mask);
3684 /* create: and_tmp = or_tmp & mask */
3685 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
3687 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
3688 or_tmp_name, mask_cst);
3689 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
3691 /* Make and_tmp the left operand of the conditional test against zero.
3692 if and_tmp has a nonzero bit then some address is unaligned. */
3693 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3694 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
3695 and_tmp_name, ptrsize_zero);
3696 chain_cond_expr (cond_expr, part_cond_expr);
3699 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
3700 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
3701 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3702 and this new condition are true. Treat a null *COND_EXPR as "true". */
3704 static void
3705 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
3707 const vec<vec_object_pair> &pairs
3708 = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3709 unsigned int i;
3710 vec_object_pair *pair;
3711 FOR_EACH_VEC_ELT (pairs, i, pair)
3713 tree addr1 = build_fold_addr_expr (pair->first);
3714 tree addr2 = build_fold_addr_expr (pair->second);
3715 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
3716 addr1, addr2);
3717 chain_cond_expr (cond_expr, part_cond_expr);
3721 /* Create an expression that is true when all lower-bound conditions for
3722 the vectorized loop are met. Chain this condition with *COND_EXPR. */
3724 static void
3725 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
3727 const vec<vec_lower_bound> &lower_bounds
3728 = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3729 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3731 tree expr = lower_bounds[i].expr;
3732 tree type = unsigned_type_for (TREE_TYPE (expr));
3733 expr = fold_convert (type, expr);
3734 poly_uint64 bound = lower_bounds[i].min_value;
3735 if (!lower_bounds[i].unsigned_p)
3737 expr = fold_build2 (PLUS_EXPR, type, expr,
3738 build_int_cstu (type, bound - 1));
3739 bound += bound - 1;
3741 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
3742 build_int_cstu (type, bound));
3743 chain_cond_expr (cond_expr, part_cond_expr);
3747 /* Function vect_create_cond_for_alias_checks.
3749 Create a conditional expression that represents the run-time checks for
3750 overlapping of address ranges represented by a list of data references
3751 relations passed as input.
3753 Input:
3754 COND_EXPR - input conditional expression. New conditions will be chained
3755 with logical AND operation. If it is NULL, then the function
3756 is used to return the number of alias checks.
3757 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3758 to be checked.
3760 Output:
3761 COND_EXPR - conditional expression.
3763 The returned COND_EXPR is the conditional expression to be used in the if
3764 statement that controls which version of the loop gets executed at runtime.
3767 void
3768 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
3770 const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
3771 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3773 if (comp_alias_ddrs.is_empty ())
3774 return;
3776 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
3777 &comp_alias_ddrs, cond_expr);
3778 if (dump_enabled_p ())
3779 dump_printf_loc (MSG_NOTE, vect_location,
3780 "created %u versioning for alias checks.\n",
3781 comp_alias_ddrs.length ());
3785 /* Function vect_loop_versioning.
3787 If the loop has data references that may or may not be aligned or/and
3788 has data reference relations whose independence was not proven then
3789 two versions of the loop need to be generated, one which is vectorized
3790 and one which isn't. A test is then generated to control which of the
3791 loops is executed. The test checks for the alignment of all of the
3792 data references that may or may not be aligned. An additional
3793 sequence of runtime tests is generated for each pairs of DDRs whose
3794 independence was not proven. The vectorized version of loop is
3795 executed only if both alias and alignment tests are passed.
3797 The test generated to check which version of loop is executed
3798 is modified to also check for profitability as indicated by the
3799 cost model threshold TH.
3801 The versioning precondition(s) are placed in *COND_EXPR and
3802 *COND_EXPR_STMT_LIST. */
3804 class loop *
3805 vect_loop_versioning (loop_vec_info loop_vinfo,
3806 gimple *loop_vectorized_call)
3808 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
3809 class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3810 basic_block condition_bb;
3811 gphi_iterator gsi;
3812 gimple_stmt_iterator cond_exp_gsi;
3813 basic_block merge_bb;
3814 basic_block new_exit_bb;
3815 edge new_exit_e, e;
3816 gphi *orig_phi, *new_phi;
3817 tree cond_expr = NULL_TREE;
3818 gimple_seq cond_expr_stmt_list = NULL;
3819 tree arg;
3820 profile_probability prob = profile_probability::likely ();
3821 gimple_seq gimplify_stmt_list = NULL;
3822 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
3823 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3824 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
3825 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
3826 poly_uint64 versioning_threshold
3827 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3828 tree version_simd_if_cond
3829 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
3830 unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3832 if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3833 && !ordered_p (th, versioning_threshold))
3834 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3835 build_int_cst (TREE_TYPE (scalar_loop_iters),
3836 th - 1));
3837 if (maybe_ne (versioning_threshold, 0U))
3839 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3840 build_int_cst (TREE_TYPE (scalar_loop_iters),
3841 versioning_threshold - 1));
3842 if (cond_expr)
3843 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3844 expr, cond_expr);
3845 else
3846 cond_expr = expr;
3849 tree cost_name = NULL_TREE;
3850 profile_probability prob2 = profile_probability::uninitialized ();
3851 if (cond_expr
3852 && EXPR_P (cond_expr)
3853 && (version_niter
3854 || version_align
3855 || version_alias
3856 || version_simd_if_cond))
3858 cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3859 &cond_expr_stmt_list,
3860 is_gimple_val, NULL_TREE);
3861 /* Split prob () into two so that the overall probability of passing
3862 both the cost-model and versioning checks is the orig prob. */
3863 prob2 = prob.split (prob);
3866 if (version_niter)
3867 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3869 if (cond_expr)
3871 gimple_seq tem = NULL;
3872 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3873 &tem, is_gimple_condexpr_for_cond,
3874 NULL_TREE);
3875 gimple_seq_add_seq (&cond_expr_stmt_list, tem);
3878 if (version_align)
3879 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3880 &cond_expr_stmt_list);
3882 if (version_alias)
3884 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
3885 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
3886 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3889 if (version_simd_if_cond)
3891 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3892 if (flag_checking)
3893 if (basic_block bb
3894 = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3895 gcc_assert (bb != loop->header
3896 && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3897 && (scalar_loop == NULL
3898 || (bb != scalar_loop->header
3899 && dominated_by_p (CDI_DOMINATORS,
3900 scalar_loop->header, bb))));
3901 tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3902 tree c = fold_build2 (NE_EXPR, boolean_type_node,
3903 version_simd_if_cond, zero);
3904 if (cond_expr)
3905 cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3906 c, cond_expr);
3907 else
3908 cond_expr = c;
3909 if (dump_enabled_p ())
3910 dump_printf_loc (MSG_NOTE, vect_location,
3911 "created versioning for simd if condition check.\n");
3914 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3915 &gimplify_stmt_list,
3916 is_gimple_condexpr_for_cond, NULL_TREE);
3917 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
3919 /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
3920 invariant in. */
3921 class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3922 for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
3923 !gsi_end_p (gsi); gsi_next (&gsi))
3925 gimple *stmt = gsi_stmt (gsi);
3926 update_stmt (stmt);
3927 ssa_op_iter iter;
3928 use_operand_p use_p;
3929 basic_block def_bb;
3930 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3931 if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3932 && flow_bb_inside_loop_p (outermost, def_bb))
3933 outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
3936 /* Search for the outermost loop we can version. Avoid versioning of
3937 non-perfect nests but allow if-conversion versioned loops inside. */
3938 class loop *loop_to_version = loop;
3939 if (flow_loop_nested_p (outermost, loop))
3941 if (dump_enabled_p ())
3942 dump_printf_loc (MSG_NOTE, vect_location,
3943 "trying to apply versioning to outer loop %d\n",
3944 outermost->num);
3945 if (outermost->num == 0)
3946 outermost = superloop_at_depth (loop, 1);
3947 /* And avoid applying versioning on non-perfect nests. */
3948 while (loop_to_version != outermost
3949 && (e = single_exit (loop_outer (loop_to_version)))
3950 && !(e->flags & EDGE_COMPLEX)
3951 && (!loop_outer (loop_to_version)->inner->next
3952 || vect_loop_vectorized_call (loop_to_version))
3953 && (!loop_outer (loop_to_version)->inner->next
3954 || !loop_outer (loop_to_version)->inner->next->next))
3955 loop_to_version = loop_outer (loop_to_version);
3958 /* Apply versioning. If there is already a scalar version created by
3959 if-conversion re-use that. Note we cannot re-use the copy of
3960 an if-converted outer-loop when vectorizing the inner loop only. */
3961 gcond *cond;
3962 if ((!loop_to_version->inner || loop == loop_to_version)
3963 && loop_vectorized_call)
3965 gcc_assert (scalar_loop);
3966 condition_bb = gimple_bb (loop_vectorized_call);
3967 cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
3968 gimple_cond_set_condition_from_tree (cond, cond_expr);
3969 update_stmt (cond);
3971 if (cond_expr_stmt_list)
3973 cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
3974 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3975 GSI_SAME_STMT);
3978 /* if-conversion uses profile_probability::always () for both paths,
3979 reset the paths probabilities appropriately. */
3980 edge te, fe;
3981 extract_true_false_edges_from_block (condition_bb, &te, &fe);
3982 te->probability = prob;
3983 fe->probability = prob.invert ();
3984 /* We can scale loops counts immediately but have to postpone
3985 scaling the scalar loop because we re-use it during peeling. */
3986 scale_loop_frequencies (loop_to_version, te->probability);
3987 LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = fe->probability;
3989 nloop = scalar_loop;
3990 if (dump_enabled_p ())
3991 dump_printf_loc (MSG_NOTE, vect_location,
3992 "reusing %sloop version created by if conversion\n",
3993 loop_to_version != loop ? "outer " : "");
3995 else
3997 if (loop_to_version != loop
3998 && dump_enabled_p ())
3999 dump_printf_loc (MSG_NOTE, vect_location,
4000 "applying loop versioning to outer loop %d\n",
4001 loop_to_version->num);
4003 unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
4005 initialize_original_copy_tables ();
4006 nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
4007 prob, prob.invert (), prob, prob.invert (), true);
4008 gcc_assert (nloop);
4009 nloop = get_loop_copy (loop);
4011 /* For cycle vectorization with SLP we rely on the PHI arguments
4012 appearing in the same order as the SLP node operands which for the
4013 loop PHI nodes means the preheader edge dest index needs to remain
4014 the same for the analyzed loop which also becomes the vectorized one.
4015 Make it so in case the state after versioning differs by redirecting
4016 the first edge into the header to the same destination which moves
4017 it last. */
4018 if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
4020 edge e = EDGE_PRED (loop->header, 0);
4021 ssa_redirect_edge (e, e->dest);
4022 flush_pending_stmts (e);
4024 gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
4026 /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
4027 reap those otherwise; they also refer to the original
4028 loops. */
4029 class loop *l = loop;
4030 while (gimple *call = vect_loop_vectorized_call (l))
4032 call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
4033 fold_loop_internal_call (call, boolean_false_node);
4034 l = loop_outer (l);
4036 free_original_copy_tables ();
4038 if (cond_expr_stmt_list)
4040 cond_exp_gsi = gsi_last_bb (condition_bb);
4041 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
4042 GSI_SAME_STMT);
4045 /* Loop versioning violates an assumption we try to maintain during
4046 vectorization - that the loop exit block has a single predecessor.
4047 After versioning, the exit block of both loop versions is the same
4048 basic block (i.e. it has two predecessors). Just in order to simplify
4049 following transformations in the vectorizer, we fix this situation
4050 here by adding a new (empty) block on the exit-edge of the loop,
4051 with the proper loop-exit phis to maintain loop-closed-form.
4052 If loop versioning wasn't done from loop, but scalar_loop instead,
4053 merge_bb will have already just a single successor. */
4055 merge_bb = single_exit (loop_to_version)->dest;
4056 if (EDGE_COUNT (merge_bb->preds) >= 2)
4058 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
4059 new_exit_bb = split_edge (single_exit (loop_to_version));
4060 new_exit_e = single_exit (loop_to_version);
4061 e = EDGE_SUCC (new_exit_bb, 0);
4063 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
4064 gsi_next (&gsi))
4066 tree new_res;
4067 orig_phi = gsi.phi ();
4068 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
4069 new_phi = create_phi_node (new_res, new_exit_bb);
4070 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4071 add_phi_arg (new_phi, arg, new_exit_e,
4072 gimple_phi_arg_location_from_edge (orig_phi, e));
4073 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
4077 update_ssa (TODO_update_ssa_no_phi);
4080 /* Split the cost model check off to a separate BB. Costing assumes
4081 this is the only thing we perform when we enter the scalar loop
4082 from a failed cost decision. */
4083 if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
4085 gimple *def = SSA_NAME_DEF_STMT (cost_name);
4086 gcc_assert (gimple_bb (def) == condition_bb);
4087 /* All uses of the cost check are 'true' after the check we
4088 are going to insert. */
4089 replace_uses_by (cost_name, boolean_true_node);
4090 /* And we're going to build the new single use of it. */
4091 gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
4092 NULL_TREE, NULL_TREE);
4093 edge e = split_block (gimple_bb (def), def);
4094 gimple_stmt_iterator gsi = gsi_for_stmt (def);
4095 gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
4096 edge true_e, false_e;
4097 extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
4098 e->flags &= ~EDGE_FALLTHRU;
4099 e->flags |= EDGE_TRUE_VALUE;
4100 edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
4101 e->probability = prob2;
4102 e2->probability = prob2.invert ();
4103 set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
4104 auto_vec<basic_block, 3> adj;
4105 for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
4106 son;
4107 son = next_dom_son (CDI_DOMINATORS, son))
4108 if (EDGE_COUNT (son->preds) > 1)
4109 adj.safe_push (son);
4110 for (auto son : adj)
4111 set_immediate_dominator (CDI_DOMINATORS, son, e->src);
4114 if (version_niter)
4116 /* The versioned loop could be infinite, we need to clear existing
4117 niter information which is copied from the original loop. */
4118 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
4119 vect_free_loop_info_assumptions (nloop);
4122 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
4123 && dump_enabled_p ())
4125 if (version_alias)
4126 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
4127 vect_location,
4128 "loop versioned for vectorization because of "
4129 "possible aliasing\n");
4130 if (version_align)
4131 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
4132 vect_location,
4133 "loop versioned for vectorization to enhance "
4134 "alignment\n");
4138 return nloop;