gcc/tree-vect-loop-manip.cc

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2024 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44 #include "gimple-fold.h"
  45 #include "tree-ssa-loop-niter.h"
  46 #include "internal-fn.h"
  47 #include "stor-layout.h"
  48 #include "optabs-query.h"
  49 #include "vec-perm-indices.h"
  50 #include "insn-config.h"
  51 #include "rtl.h"
  52 #include "recog.h"
  53 #include "langhooks.h"
  54 #include "tree-vector-builder.h"
  55 #include "optabs-tree.h"
  56
  57 /*************************************************************************
  58   Simple Loop Peeling Utilities
  59
  60   Utilities to support loop peeling for vectorization purposes.
  61  *************************************************************************/
  62
  63
  64 /* Renames the use *OP_P.  */
  65
  66 static void
  67 rename_use_op (use_operand_p op_p)
  68 {
  69   tree new_name;
  70
  71   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  72     return;
  73
  74   new_name = get_current_def (USE_FROM_PTR (op_p));
  75
  76   /* Something defined outside of the loop.  */
  77   if (!new_name)
  78     return;
  79
  80   /* An ordinary ssa name defined in the loop.  */
  81
  82   SET_USE (op_p, new_name);
  83 }
  84
  85
  86 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  87    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  88    true.  */
  89
  90 static void
  91 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  92 {
  93   gimple *stmt;
  94   use_operand_p use_p;
  95   ssa_op_iter iter;
  96   edge e;
  97   edge_iterator ei;
  98   class loop *loop = bb->loop_father;
  99   class loop *outer_loop = NULL;
 100
 101   if (rename_from_outer_loop)
 102     {
 103       gcc_assert (loop);
 104       outer_loop = loop_outer (loop);
 105     }
 106
 107   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
 108        gsi_next (&gsi))
 109     {
 110       stmt = gsi_stmt (gsi);
 111       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 112         rename_use_op (use_p);
 113     }
 114
 115   FOR_EACH_EDGE (e, ei, bb->preds)
 116     {
 117       if (!flow_bb_inside_loop_p (loop, e->src))
 118         {
 119           if (!rename_from_outer_loop)
 120             continue;
 121           if (e->src != outer_loop->header)
 122             {
 123               if (outer_loop->inner->next)
 124                 {
 125                   /* If outer_loop has 2 inner loops, allow there to
 126                      be an extra basic block which decides which of the
 127                      two loops to use using LOOP_VECTORIZED.  */
 128                   if (!single_pred_p (e->src)
 129                       || single_pred (e->src) != outer_loop->header)
 130                     continue;
 131                 }
 132             }
 133         }
 134       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 135            gsi_next (&gsi))
 136         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 137     }
 138 }
 139
 140
 141 struct adjust_info
 142 {
 143   tree from, to;
 144   basic_block bb;
 145 };
 146
 147 /* A stack of values to be adjusted in debug stmts.  We have to
 148    process them LIFO, so that the closest substitution applies.  If we
 149    processed them FIFO, without the stack, we might substitute uses
 150    with a PHI DEF that would soon become non-dominant, and when we got
 151    to the suitable one, it wouldn't have anything to substitute any
 152    more.  */
 153 static vec<adjust_info, va_heap> adjust_vec;
 154
 155 /* Adjust any debug stmts that referenced AI->from values to use the
 156    loop-closed AI->to, if the references are dominated by AI->bb and
 157    not by the definition of AI->from.  */
 158
 159 static void
 160 adjust_debug_stmts_now (adjust_info *ai)
 161 {
 162   basic_block bbphi = ai->bb;
 163   tree orig_def = ai->from;
 164   tree new_def = ai->to;
 165   imm_use_iterator imm_iter;
 166   gimple *stmt;
 167   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 168
 169   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 170
 171   /* Adjust any debug stmts that held onto non-loop-closed
 172      references.  */
 173   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 174     {
 175       use_operand_p use_p;
 176       basic_block bbuse;
 177
 178       if (!is_gimple_debug (stmt))
 179         continue;
 180
 181       gcc_assert (gimple_debug_bind_p (stmt));
 182
 183       bbuse = gimple_bb (stmt);
 184
 185       if ((bbuse == bbphi
 186            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 187           && !(bbuse == bbdef
 188                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 189         {
 190           if (new_def)
 191             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 192               SET_USE (use_p, new_def);
 193           else
 194             {
 195               gimple_debug_bind_reset_value (stmt);
 196               update_stmt (stmt);
 197             }
 198         }
 199     }
 200 }
 201
 202 /* Adjust debug stmts as scheduled before.  */
 203
 204 static void
 205 adjust_vec_debug_stmts (void)
 206 {
 207   if (!MAY_HAVE_DEBUG_BIND_STMTS)
 208     return;
 209
 210   gcc_assert (adjust_vec.exists ());
 211
 212   while (!adjust_vec.is_empty ())
 213     {
 214       adjust_debug_stmts_now (&adjust_vec.last ());
 215       adjust_vec.pop ();
 216     }
 217 }
 218
 219 /* Adjust any debug stmts that referenced FROM values to use the
 220    loop-closed TO, if the references are dominated by BB and not by
 221    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 222    will be postponed until adjust_vec_debug_stmts is called.  */
 223
 224 static void
 225 adjust_debug_stmts (tree from, tree to, basic_block bb)
 226 {
 227   adjust_info ai;
 228
 229   if (MAY_HAVE_DEBUG_BIND_STMTS
 230       && TREE_CODE (from) == SSA_NAME
 231       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 232       && ! virtual_operand_p (from))
 233     {
 234       ai.from = from;
 235       ai.to = to;
 236       ai.bb = bb;
 237
 238       if (adjust_vec.exists ())
 239         adjust_vec.safe_push (ai);
 240       else
 241         adjust_debug_stmts_now (&ai);
 242     }
 243 }
 244
 245 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 246    to adjust any debug stmts that referenced the old phi arg,
 247    presumably non-loop-closed references left over from other
 248    transformations.  */
 249
 250 static void
 251 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 252 {
 253   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 254
 255   gcc_assert (TREE_CODE (orig_def) != SSA_NAME
 256               || orig_def != new_def);
 257
 258   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 259
 260   if (MAY_HAVE_DEBUG_BIND_STMTS)
 261     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 262                         gimple_bb (update_phi));
 263 }
 264
 265 /* Define one loop rgroup control CTRL from loop LOOP.  INIT_CTRL is the value
 266    that the control should have during the first iteration and NEXT_CTRL is the
 267    value that it should have on subsequent iterations.  */
 268
 269 static void
 270 vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
 271                        tree next_ctrl)
 272 {
 273   gphi *phi = create_phi_node (ctrl, loop->header);
 274   add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
 275   add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
 276 }
 277
 278 /* Add SEQ to the end of LOOP's preheader block.  */
 279
 280 static void
 281 add_preheader_seq (class loop *loop, gimple_seq seq)
 282 {
 283   if (seq)
 284     {
 285       edge pe = loop_preheader_edge (loop);
 286       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
 287       gcc_assert (!new_bb);
 288     }
 289 }
 290
 291 /* Add SEQ to the beginning of LOOP's header block.  */
 292
 293 static void
 294 add_header_seq (class loop *loop, gimple_seq seq)
 295 {
 296   if (seq)
 297     {
 298       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
 299       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
 300     }
 301 }
 302
 303 /* Return true if the target can interleave elements of two vectors.
 304    OFFSET is 0 if the first half of the vectors should be interleaved
 305    or 1 if the second half should.  When returning true, store the
 306    associated permutation in INDICES.  */
 307
 308 static bool
 309 interleave_supported_p (vec_perm_indices *indices, tree vectype,
 310                         unsigned int offset)
 311 {
 312   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
 313   poly_uint64 base = exact_div (nelts, 2) * offset;
 314   vec_perm_builder sel (nelts, 2, 3);
 315   for (unsigned int i = 0; i < 3; ++i)
 316     {
 317       sel.quick_push (base + i);
 318       sel.quick_push (base + i + nelts);
 319     }
 320   indices->new_vector (sel, 2, nelts);
 321   return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
 322                                *indices);
 323 }
 324
 325 /* Try to use permutes to define the masks in DEST_RGM using the masks
 326    in SRC_RGM, given that the former has twice as many masks as the
 327    latter.  Return true on success, adding any new statements to SEQ.  */
 328
 329 static bool
 330 vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
 331                                rgroup_controls *src_rgm)
 332 {
 333   tree src_masktype = src_rgm->type;
 334   tree dest_masktype = dest_rgm->type;
 335   machine_mode src_mode = TYPE_MODE (src_masktype);
 336   insn_code icode1, icode2;
 337   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
 338       && (icode1 = optab_handler (vec_unpacku_hi_optab,
 339                                   src_mode)) != CODE_FOR_nothing
 340       && (icode2 = optab_handler (vec_unpacku_lo_optab,
 341                                   src_mode)) != CODE_FOR_nothing)
 342     {
 343       /* Unpacking the source masks gives at least as many mask bits as
 344          we need.  We can then VIEW_CONVERT any excess bits away.  */
 345       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
 346       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
 347       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
 348       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
 349         {
 350           tree src = src_rgm->controls[i / 2];
 351           tree dest = dest_rgm->controls[i];
 352           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
 353                             ? VEC_UNPACK_HI_EXPR
 354                             : VEC_UNPACK_LO_EXPR);
 355           gassign *stmt;
 356           if (dest_masktype == unpack_masktype)
 357             stmt = gimple_build_assign (dest, code, src);
 358           else
 359             {
 360               tree temp = make_ssa_name (unpack_masktype);
 361               stmt = gimple_build_assign (temp, code, src);
 362               gimple_seq_add_stmt (seq, stmt);
 363               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
 364                                           build1 (VIEW_CONVERT_EXPR,
 365                                                   dest_masktype, temp));
 366             }
 367           gimple_seq_add_stmt (seq, stmt);
 368         }
 369       return true;
 370     }
 371   vec_perm_indices indices[2];
 372   if (dest_masktype == src_masktype
 373       && interleave_supported_p (&indices[0], src_masktype, 0)
 374       && interleave_supported_p (&indices[1], src_masktype, 1))
 375     {
 376       /* The destination requires twice as many mask bits as the source, so
 377          we can use interleaving permutes to double up the number of bits.  */
 378       tree masks[2];
 379       for (unsigned int i = 0; i < 2; ++i)
 380         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
 381       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
 382         {
 383           tree src = src_rgm->controls[i / 2];
 384           tree dest = dest_rgm->controls[i];
 385           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
 386                                               src, src, masks[i & 1]);
 387           gimple_seq_add_stmt (seq, stmt);
 388         }
 389       return true;
 390     }
 391   return false;
 392 }
 393
 394 /* Populate DEST_RGM->controls, given that they should add up to STEP.
 395
 396      STEP = MIN_EXPR <ivtmp_34, VF>;
 397
 398      First length (MIN (X, VF/N)):
 399        loop_len_15 = MIN_EXPR <STEP, VF/N>;
 400
 401      Second length:
 402        tmp = STEP - loop_len_15;
 403        loop_len_16 = MIN (tmp, VF/N);
 404
 405      Third length:
 406        tmp2 = tmp - loop_len_16;
 407        loop_len_17 = MIN (tmp2, VF/N);
 408
 409      Last length:
 410        loop_len_18 = tmp2 - loop_len_17;
 411 */
 412
 413 static void
 414 vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
 415                                rgroup_controls *dest_rgm, tree step)
 416 {
 417   tree ctrl_type = dest_rgm->type;
 418   poly_uint64 nitems_per_ctrl
 419     = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
 420   tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
 421
 422   for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
 423     {
 424       tree ctrl = dest_rgm->controls[i];
 425       if (i == 0)
 426         {
 427           /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
 428           gassign *assign
 429             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
 430           gimple_seq_add_stmt (seq, assign);
 431         }
 432       else if (i == dest_rgm->controls.length () - 1)
 433         {
 434           /* Last iteration: Remain capped to the range [0, VF/N].  */
 435           gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
 436                                                  dest_rgm->controls[i - 1]);
 437           gimple_seq_add_stmt (seq, assign);
 438         }
 439       else
 440         {
 441           /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
 442           step = gimple_build (seq, MINUS_EXPR, iv_type, step,
 443                                dest_rgm->controls[i - 1]);
 444           gassign *assign
 445             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
 446           gimple_seq_add_stmt (seq, assign);
 447         }
 448     }
 449 }
 450
 451 /* Stores the standard position for induction variable increment in belonging to
 452    LOOP_EXIT (just before the exit condition of the given exit to BSI.
 453    INSERT_AFTER is set to true if the increment should be inserted after
 454    *BSI.  */
 455
 456 void
 457 vect_iv_increment_position (edge loop_exit, gimple_stmt_iterator *bsi,
 458                             bool *insert_after)
 459 {
 460   basic_block bb = loop_exit->src;
 461   *bsi = gsi_last_bb (bb);
 462   *insert_after = false;
 463 }
 464
 465 /* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
 466    for all the rgroup controls in RGC and return a control that is nonzero
 467    when the loop needs to iterate.  Add any new preheader statements to
 468    PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
 469
 470    RGC belongs to loop LOOP.  The loop originally iterated NITERS
 471    times and has been vectorized according to LOOP_VINFO.
 472
 473    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
 474    starts with NITERS_SKIP dummy iterations of the scalar loop before
 475    the real work starts.  The mask elements for these dummy iterations
 476    must be 0, to ensure that the extra iterations do not have an effect.
 477
 478    It is known that:
 479
 480      NITERS * RGC->max_nscalars_per_iter * RGC->factor
 481
 482    does not overflow.  However, MIGHT_WRAP_P says whether an induction
 483    variable that starts at 0 and has step:
 484
 485      VF * RGC->max_nscalars_per_iter * RGC->factor
 486
 487    might overflow before hitting a value above:
 488
 489      (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
 490
 491    This means that we cannot guarantee that such an induction variable
 492    would ever hit a value that produces a set of all-false masks or zero
 493    lengths for RGC.
 494
 495    Note: the cost of the code generated by this function is modeled
 496    by vect_estimate_min_profitable_iters, so changes here may need
 497    corresponding changes there.  */
 498
 499 static tree
 500 vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
 501                                  gimple_seq *preheader_seq,
 502                                  gimple_seq *header_seq,
 503                                  gimple_stmt_iterator loop_cond_gsi,
 504                                  rgroup_controls *rgc, tree niters,
 505                                  tree niters_skip, bool might_wrap_p,
 506                                  tree *iv_step, tree *compare_step)
 507 {
 508   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
 509   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
 510   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
 511
 512   tree ctrl_type = rgc->type;
 513   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
 514   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
 515   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 516   tree length_limit = NULL_TREE;
 517   /* For length, we need length_limit to ensure length in range.  */
 518   if (!use_masks_p)
 519     length_limit = build_int_cst (compare_type, nitems_per_ctrl);
 520
 521   /* Calculate the maximum number of item values that the rgroup
 522      handles in total, the number that it handles for each iteration
 523      of the vector loop, and the number that it should skip during the
 524      first iteration of the vector loop.  */
 525   tree nitems_total = niters;
 526   tree nitems_step = build_int_cst (iv_type, vf);
 527   tree nitems_skip = niters_skip;
 528   if (nitems_per_iter != 1)
 529     {
 530       /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
 531          these multiplications don't overflow.  */
 532       tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
 533       tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
 534       nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
 535                                    nitems_total, compare_factor);
 536       nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
 537                                   nitems_step, iv_factor);
 538       if (nitems_skip)
 539         nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
 540                                     nitems_skip, compare_factor);
 541     }
 542
 543   /* Create an induction variable that counts the number of items
 544      processed.  */
 545   tree index_before_incr, index_after_incr;
 546   gimple_stmt_iterator incr_gsi;
 547   bool insert_after;
 548   edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
 549   vect_iv_increment_position (exit_e, &incr_gsi, &insert_after);
 550   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
 551     {
 552       /* Create an IV that counts down from niters_total and whose step
 553          is the (variable) amount processed in the current iteration:
 554            ...
 555            _10 = (unsigned long) count_12(D);
 556            ...
 557            # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
 558            _36 = (MIN_EXPR | SELECT_VL) <ivtmp_9, POLY_INT_CST [4, 4]>;
 559            ...
 560            vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
 561            ...
 562            ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
 563            ...
 564            if (ivtmp_9 > POLY_INT_CST [4, 4])
 565              goto <bb 4>; [83.33%]
 566            else
 567              goto <bb 5>; [16.67%]
 568       */
 569       nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
 570       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
 571                                                : make_ssa_name (iv_type);
 572       /* Create decrement IV.  */
 573       if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
 574         {
 575           create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
 576                      insert_after, &index_before_incr, &index_after_incr);
 577           tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
 578                                    index_before_incr, nitems_step);
 579           gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
 580         }
 581       else
 582         {
 583           create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
 584                      &incr_gsi, insert_after, &index_before_incr,
 585                      &index_after_incr);
 586           gimple_seq_add_stmt (header_seq,
 587                                gimple_build_assign (step, MIN_EXPR,
 588                                                     index_before_incr,
 589                                                     nitems_step));
 590         }
 591       *iv_step = step;
 592       *compare_step = nitems_step;
 593       return LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? index_after_incr
 594                                                        : index_before_incr;
 595     }
 596
 597   /* Create increment IV.  */
 598   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
 599              loop, &incr_gsi, insert_after, &index_before_incr,
 600              &index_after_incr);
 601
 602   tree zero_index = build_int_cst (compare_type, 0);
 603   tree test_index, test_limit, first_limit;
 604   gimple_stmt_iterator *test_gsi;
 605   if (might_wrap_p)
 606     {
 607       /* In principle the loop should stop iterating once the incremented
 608          IV reaches a value greater than or equal to:
 609
 610            NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
 611
 612          However, there's no guarantee that this addition doesn't overflow
 613          the comparison type, or that the IV hits a value above it before
 614          wrapping around.  We therefore adjust the limit down by one
 615          IV step:
 616
 617            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
 618            -[infinite-prec] NITEMS_STEP
 619
 620          and compare the IV against this limit _before_ incrementing it.
 621          Since the comparison type is unsigned, we actually want the
 622          subtraction to saturate at zero:
 623
 624            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
 625            -[sat] NITEMS_STEP
 626
 627          And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
 628
 629            NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
 630
 631          where the rightmost subtraction can be done directly in
 632          COMPARE_TYPE.  */
 633       test_index = index_before_incr;
 634       tree adjust = gimple_convert (preheader_seq, compare_type,
 635                                     nitems_step);
 636       if (nitems_skip)
 637         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
 638                                adjust, nitems_skip);
 639       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
 640                                  nitems_total, adjust);
 641       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
 642                                  test_limit, adjust);
 643       test_gsi = &incr_gsi;
 644
 645       /* Get a safe limit for the first iteration.  */
 646       if (nitems_skip)
 647         {
 648           /* The first vector iteration can handle at most NITEMS_STEP
 649              items.  NITEMS_STEP <= CONST_LIMIT, and adding
 650              NITEMS_SKIP to that cannot overflow.  */
 651           tree const_limit = build_int_cst (compare_type,
 652                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
 653                                             * nitems_per_iter);
 654           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
 655                                       nitems_total, const_limit);
 656           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
 657                                       first_limit, nitems_skip);
 658         }
 659       else
 660         /* For the first iteration it doesn't matter whether the IV hits
 661            a value above NITEMS_TOTAL.  That only matters for the latch
 662            condition.  */
 663         first_limit = nitems_total;
 664     }
 665   else
 666     {
 667       /* Test the incremented IV, which will always hit a value above
 668          the bound before wrapping.  */
 669       test_index = index_after_incr;
 670       test_limit = nitems_total;
 671       if (nitems_skip)
 672         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
 673                                    test_limit, nitems_skip);
 674       test_gsi = &loop_cond_gsi;
 675
 676       first_limit = test_limit;
 677     }
 678
 679   /* Convert the IV value to the comparison type (either a no-op or
 680      a demotion).  */
 681   gimple_seq test_seq = NULL;
 682   test_index = gimple_convert (&test_seq, compare_type, test_index);
 683   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
 684
 685   /* Provide a definition of each control in the group.  */
 686   tree next_ctrl = NULL_TREE;
 687   tree ctrl;
 688   unsigned int i;
 689   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
 690     {
 691       /* Previous controls will cover BIAS items.  This control covers the
 692          next batch.  */
 693       poly_uint64 bias = nitems_per_ctrl * i;
 694       tree bias_tree = build_int_cst (compare_type, bias);
 695
 696       /* See whether the first iteration of the vector loop is known
 697          to have a full control.  */
 698       poly_uint64 const_limit;
 699       bool first_iteration_full
 700         = (poly_int_tree_p (first_limit, &const_limit)
 701            && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
 702
 703       /* Rather than have a new IV that starts at BIAS and goes up to
 704          TEST_LIMIT, prefer to use the same 0-based IV for each control
 705          and adjust the bound down by BIAS.  */
 706       tree this_test_limit = test_limit;
 707       if (i != 0)
 708         {
 709           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
 710                                           compare_type, this_test_limit,
 711                                           bias_tree);
 712           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
 713                                           compare_type, this_test_limit,
 714                                           bias_tree);
 715         }
 716
 717       /* Create the initial control.  First include all items that
 718          are within the loop limit.  */
 719       tree init_ctrl = NULL_TREE;
 720       if (!first_iteration_full)
 721         {
 722           tree start, end;
 723           if (first_limit == test_limit)
 724             {
 725               /* Use a natural test between zero (the initial IV value)
 726                  and the loop limit.  The "else" block would be valid too,
 727                  but this choice can avoid the need to load BIAS_TREE into
 728                  a register.  */
 729               start = zero_index;
 730               end = this_test_limit;
 731             }
 732           else
 733             {
 734               /* FIRST_LIMIT is the maximum number of items handled by the
 735                  first iteration of the vector loop.  Test the portion
 736                  associated with this control.  */
 737               start = bias_tree;
 738               end = first_limit;
 739             }
 740
 741           if (use_masks_p)
 742             init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
 743                                         start, end, "max_mask");
 744           else
 745             {
 746               init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
 747               gimple_seq seq = vect_gen_len (init_ctrl, start,
 748                                              end, length_limit);
 749               gimple_seq_add_seq (preheader_seq, seq);
 750             }
 751         }
 752
 753       /* Now AND out the bits that are within the number of skipped
 754          items.  */
 755       poly_uint64 const_skip;
 756       if (nitems_skip
 757           && !(poly_int_tree_p (nitems_skip, &const_skip)
 758                && known_le (const_skip, bias)))
 759         {
 760           gcc_assert (use_masks_p);
 761           tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
 762                                                     bias_tree, nitems_skip);
 763           if (init_ctrl)
 764             init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
 765                                       init_ctrl, unskipped_mask);
 766           else
 767             init_ctrl = unskipped_mask;
 768         }
 769
 770       if (!init_ctrl)
 771         {
 772           /* First iteration is full.  */
 773           if (use_masks_p)
 774             init_ctrl = build_minus_one_cst (ctrl_type);
 775           else
 776             init_ctrl = length_limit;
 777         }
 778
 779       /* Get the control value for the next iteration of the loop.  */
 780       if (use_masks_p)
 781         {
 782           gimple_seq stmts = NULL;
 783           next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
 784                                       this_test_limit, "next_mask");
 785           gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
 786         }
 787       else
 788         {
 789           next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
 790           gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
 791                                          length_limit);
 792           gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
 793         }
 794
 795       vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
 796     }
 797
 798   int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
 799   if (partial_load_bias != 0)
 800     {
 801       tree adjusted_len = rgc->bias_adjusted_ctrl;
 802       gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
 803                                             rgc->controls[0],
 804                                             build_int_cst
 805                                             (TREE_TYPE (rgc->controls[0]),
 806                                              partial_load_bias));
 807       gimple_seq_add_stmt (header_seq, minus);
 808     }
 809
 810   return next_ctrl;
 811 }
 812
 813 /* Set up the iteration condition and rgroup controls for LOOP, given
 814    that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
 815    loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
 816    the number of iterations of the original scalar loop that should be
 817    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
 818    for vect_set_loop_condition.
 819
 820    Insert the branch-back condition before LOOP_COND_GSI and return the
 821    final gcond.  */
 822
 823 static gcond *
 824 vect_set_loop_condition_partial_vectors (class loop *loop, edge exit_edge,
 825                                          loop_vec_info loop_vinfo, tree niters,
 826                                          tree final_iv, bool niters_maybe_zero,
 827                                          gimple_stmt_iterator loop_cond_gsi)
 828 {
 829   gimple_seq preheader_seq = NULL;
 830   gimple_seq header_seq = NULL;
 831
 832   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
 833   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
 834   unsigned int compare_precision = TYPE_PRECISION (compare_type);
 835   tree orig_niters = niters;
 836
 837   /* Type of the initial value of NITERS.  */
 838   tree ni_actual_type = TREE_TYPE (niters);
 839   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
 840   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
 841   if (niters_skip)
 842     niters_skip = gimple_convert (&preheader_seq, compare_type, niters_skip);
 843
 844   /* Convert NITERS to the same size as the compare.  */
 845   if (compare_precision > ni_actual_precision
 846       && niters_maybe_zero)
 847     {
 848       /* We know that there is always at least one iteration, so if the
 849          count is zero then it must have wrapped.  Cope with this by
 850          subtracting 1 before the conversion and adding 1 to the result.  */
 851       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
 852       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
 853                              niters, build_minus_one_cst (ni_actual_type));
 854       niters = gimple_convert (&preheader_seq, compare_type, niters);
 855       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
 856                              niters, build_one_cst (compare_type));
 857     }
 858   else
 859     niters = gimple_convert (&preheader_seq, compare_type, niters);
 860
 861   /* Iterate over all the rgroups and fill in their controls.  We could use
 862      the first control from any rgroup for the loop condition; here we
 863      arbitrarily pick the last.  */
 864   tree test_ctrl = NULL_TREE;
 865   tree iv_step = NULL_TREE;
 866   tree compare_step = NULL_TREE;
 867   rgroup_controls *rgc;
 868   rgroup_controls *iv_rgc = nullptr;
 869   unsigned int i;
 870   auto_vec<rgroup_controls> *controls = use_masks_p
 871                                           ? &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec
 872                                           : &LOOP_VINFO_LENS (loop_vinfo);
 873   FOR_EACH_VEC_ELT (*controls, i, rgc)
 874     if (!rgc->controls.is_empty ())
 875       {
 876         /* First try using permutes.  This adds a single vector
 877            instruction to the loop for each mask, but needs no extra
 878            loop invariants or IVs.  */
 879         unsigned int nmasks = i + 1;
 880         if (use_masks_p && (nmasks & 1) == 0)
 881           {
 882             rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
 883             if (!half_rgc->controls.is_empty ()
 884                 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
 885               continue;
 886           }
 887
 888         if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
 889             || !iv_rgc
 890             || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
 891                 != rgc->max_nscalars_per_iter * rgc->factor))
 892           {
 893             /* See whether zero-based IV would ever generate all-false masks
 894                or zero length before wrapping around.  */
 895             bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
 896
 897             /* Set up all controls for this group.  */
 898             test_ctrl
 899               = vect_set_loop_controls_directly (loop, loop_vinfo,
 900                                                  &preheader_seq, &header_seq,
 901                                                  loop_cond_gsi, rgc, niters,
 902                                                  niters_skip, might_wrap_p,
 903                                                  &iv_step, &compare_step);
 904
 905             iv_rgc = rgc;
 906           }
 907
 908         if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
 909             && rgc->controls.length () > 1)
 910           {
 911             /* vect_set_loop_controls_directly creates an IV whose step
 912                is equal to the expected sum of RGC->controls.  Use that
 913                information to populate RGC->controls.  */
 914             tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
 915             gcc_assert (iv_step);
 916             vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
 917           }
 918       }
 919
 920   /* Emit all accumulated statements.  */
 921   add_preheader_seq (loop, preheader_seq);
 922   add_header_seq (loop, header_seq);
 923
 924   /* Get a boolean result that tells us whether to iterate.  */
 925   gcond *cond_stmt;
 926   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
 927       && !LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
 928     {
 929       gcc_assert (compare_step);
 930       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
 931       cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
 932                                      NULL_TREE);
 933     }
 934   else
 935     {
 936       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 937       tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
 938       cond_stmt
 939         = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
 940     }
 941   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 942
 943   /* The loop iterates (NITERS - 1) / VF + 1 times.
 944      Subtract one from this to get the latch count.  */
 945   tree step = build_int_cst (compare_type,
 946                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 947   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
 948                                        build_minus_one_cst (compare_type));
 949   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
 950                                      niters_minus_one, step);
 951
 952   if (final_iv)
 953     {
 954       gassign *assign;
 955       /* If vectorizing an inverted early break loop we have to restart the
 956          scalar loop at niters - vf.  This matches what we do in
 957          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
 958       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
 959         {
 960           tree ftype = TREE_TYPE (orig_niters);
 961           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 962           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
 963         }
 964        else
 965         assign = gimple_build_assign (final_iv, orig_niters);
 966       gsi_insert_on_edge_immediate (exit_edge, assign);
 967     }
 968
 969   return cond_stmt;
 970 }
 971
 972 /* Set up the iteration condition and rgroup controls for LOOP in AVX512
 973    style, given that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the
 974    vectorized loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
 975    the number of iterations of the original scalar loop that should be
 976    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
 977    for vect_set_loop_condition.
 978
 979    Insert the branch-back condition before LOOP_COND_GSI and return the
 980    final gcond.  */
 981
 982 static gcond *
 983 vect_set_loop_condition_partial_vectors_avx512 (class loop *loop,
 984                                          edge exit_edge,
 985                                          loop_vec_info loop_vinfo, tree niters,
 986                                          tree final_iv,
 987                                          bool niters_maybe_zero,
 988                                          gimple_stmt_iterator loop_cond_gsi)
 989 {
 990   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
 991   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
 992   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 993   tree orig_niters = niters;
 994   gimple_seq preheader_seq = NULL;
 995
 996   /* Create an IV that counts down from niters and whose step
 997      is the number of iterations processed in the current iteration.
 998      Produce the controls with compares like the following.
 999
1000        # iv_2 = PHI <niters, iv_3>
1001        rem_4 = MIN <iv_2, VF>;
1002        remv_6 = { rem_4, rem_4, rem_4, ... }
1003        mask_5 = { 0, 0, 1, 1, 2, 2, ... } < remv6;
1004        iv_3 = iv_2 - VF;
1005        if (iv_2 > VF)
1006          continue;
1007
1008      Where the constant is built with elements at most VF - 1 and
1009      repetitions according to max_nscalars_per_iter which is guarnateed
1010      to be the same within a group.  */
1011
1012   /* Convert NITERS to the determined IV type.  */
1013   if (TYPE_PRECISION (iv_type) > TYPE_PRECISION (TREE_TYPE (niters))
1014       && niters_maybe_zero)
1015     {
1016       /* We know that there is always at least one iteration, so if the
1017          count is zero then it must have wrapped.  Cope with this by
1018          subtracting 1 before the conversion and adding 1 to the result.  */
1019       gcc_assert (TYPE_UNSIGNED (TREE_TYPE (niters)));
1020       niters = gimple_build (&preheader_seq, PLUS_EXPR, TREE_TYPE (niters),
1021                              niters, build_minus_one_cst (TREE_TYPE (niters)));
1022       niters = gimple_convert (&preheader_seq, iv_type, niters);
1023       niters = gimple_build (&preheader_seq, PLUS_EXPR, iv_type,
1024                              niters, build_one_cst (iv_type));
1025     }
1026   else
1027     niters = gimple_convert (&preheader_seq, iv_type, niters);
1028
1029   /* Bias the initial value of the IV in case we need to skip iterations
1030      at the beginning.  */
1031   tree niters_adj = niters;
1032   if (niters_skip)
1033     {
1034       tree skip = gimple_convert (&preheader_seq, iv_type, niters_skip);
1035       niters_adj = gimple_build (&preheader_seq, PLUS_EXPR,
1036                                  iv_type, niters, skip);
1037     }
1038
1039   /* The iteration step is the vectorization factor.  */
1040   tree iv_step = build_int_cst (iv_type, vf);
1041
1042   /* Create the decrement IV.  */
1043   tree index_before_incr, index_after_incr;
1044   gimple_stmt_iterator incr_gsi;
1045   bool insert_after;
1046   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
1047   create_iv (niters_adj, MINUS_EXPR, iv_step, NULL_TREE, loop,
1048              &incr_gsi, insert_after, &index_before_incr,
1049              &index_after_incr);
1050
1051   /* Iterate over all the rgroups and fill in their controls.  */
1052   for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1053     {
1054       if (rgc.controls.is_empty ())
1055         continue;
1056
1057       tree ctrl_type = rgc.type;
1058       poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
1059
1060       tree vectype = rgc.compare_type;
1061
1062       /* index_after_incr is the IV specifying the remaining iterations in
1063          the next iteration.  */
1064       tree rem = index_after_incr;
1065       /* When the data type for the compare to produce the mask is
1066          smaller than the IV type we need to saturate.  Saturate to
1067          the smallest possible value (IV_TYPE) so we only have to
1068          saturate once (CSE will catch redundant ones we add).  */
1069       if (TYPE_PRECISION (TREE_TYPE (vectype)) < TYPE_PRECISION (iv_type))
1070         rem = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1071                             UNKNOWN_LOCATION,
1072                             MIN_EXPR, TREE_TYPE (rem), rem, iv_step);
1073       rem = gimple_convert (&incr_gsi, false, GSI_CONTINUE_LINKING,
1074                             UNKNOWN_LOCATION, TREE_TYPE (vectype), rem);
1075
1076       /* Build a data vector composed of the remaining iterations.  */
1077       rem = gimple_build_vector_from_val (&incr_gsi, false, GSI_CONTINUE_LINKING,
1078                                           UNKNOWN_LOCATION, vectype, rem);
1079
1080       /* Provide a definition of each vector in the control group.  */
1081       tree next_ctrl = NULL_TREE;
1082       tree first_rem = NULL_TREE;
1083       tree ctrl;
1084       unsigned int i;
1085       FOR_EACH_VEC_ELT_REVERSE (rgc.controls, i, ctrl)
1086         {
1087           /* Previous controls will cover BIAS items.  This control covers the
1088              next batch.  */
1089           poly_uint64 bias = nitems_per_ctrl * i;
1090
1091           /* Build the constant to compare the remaining iters against,
1092              this is sth like { 0, 0, 1, 1, 2, 2, 3, 3, ... } appropriately
1093              split into pieces.  */
1094           unsigned n = TYPE_VECTOR_SUBPARTS (ctrl_type).to_constant ();
1095           tree_vector_builder builder (vectype, n, 1);
1096           for (unsigned i = 0; i < n; ++i)
1097             {
1098               unsigned HOST_WIDE_INT val
1099                 = (i + bias.to_constant ()) / rgc.max_nscalars_per_iter;
1100               gcc_assert (val < vf.to_constant ());
1101               builder.quick_push (build_int_cst (TREE_TYPE (vectype), val));
1102             }
1103           tree cmp_series = builder.build ();
1104
1105           /* Create the initial control.  First include all items that
1106              are within the loop limit.  */
1107           tree init_ctrl = NULL_TREE;
1108           poly_uint64 const_limit;
1109           /* See whether the first iteration of the vector loop is known
1110              to have a full control.  */
1111           if (poly_int_tree_p (niters, &const_limit)
1112               && known_ge (const_limit, (i + 1) * nitems_per_ctrl))
1113             init_ctrl = build_minus_one_cst (ctrl_type);
1114           else
1115             {
1116               /* The remaining work items initially are niters.  Saturate,
1117                  splat and compare.  */
1118               if (!first_rem)
1119                 {
1120                   first_rem = niters;
1121                   if (TYPE_PRECISION (TREE_TYPE (vectype))
1122                       < TYPE_PRECISION (iv_type))
1123                     first_rem = gimple_build (&preheader_seq,
1124                                               MIN_EXPR, TREE_TYPE (first_rem),
1125                                               first_rem, iv_step);
1126                   first_rem = gimple_convert (&preheader_seq, TREE_TYPE (vectype),
1127                                               first_rem);
1128                   first_rem = gimple_build_vector_from_val (&preheader_seq,
1129                                                             vectype, first_rem);
1130                 }
1131               init_ctrl = gimple_build (&preheader_seq, LT_EXPR, ctrl_type,
1132                                         cmp_series, first_rem);
1133             }
1134
1135           /* Now AND out the bits that are within the number of skipped
1136              items.  */
1137           poly_uint64 const_skip;
1138           if (niters_skip
1139               && !(poly_int_tree_p (niters_skip, &const_skip)
1140                    && known_le (const_skip, bias)))
1141             {
1142               /* For integer mode masks it's cheaper to shift out the bits
1143                  since that avoids loading a constant.  */
1144               gcc_assert (GET_MODE_CLASS (TYPE_MODE (ctrl_type)) == MODE_INT);
1145               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
1146                                         lang_hooks.types.type_for_mode
1147                                           (TYPE_MODE (ctrl_type), 1),
1148                                         init_ctrl);
1149               /* ???  But when the shift amount isn't constant this requires
1150                  a round-trip to GRPs.  We could apply the bias to either
1151                  side of the compare instead.  */
1152               tree shift = gimple_build (&preheader_seq, MULT_EXPR,
1153                                          TREE_TYPE (niters_skip), niters_skip,
1154                                          build_int_cst (TREE_TYPE (niters_skip),
1155                                                         rgc.max_nscalars_per_iter));
1156               init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
1157                                         TREE_TYPE (init_ctrl),
1158                                         init_ctrl, shift);
1159               init_ctrl = gimple_build (&preheader_seq, VIEW_CONVERT_EXPR,
1160                                         ctrl_type, init_ctrl);
1161             }
1162
1163           /* Get the control value for the next iteration of the loop.  */
1164           next_ctrl = gimple_build (&incr_gsi, false, GSI_CONTINUE_LINKING,
1165                                     UNKNOWN_LOCATION,
1166                                     LT_EXPR, ctrl_type, cmp_series, rem);
1167
1168           vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
1169         }
1170     }
1171
1172   /* Emit all accumulated statements.  */
1173   add_preheader_seq (loop, preheader_seq);
1174
1175   /* Adjust the exit test using the decrementing IV.  */
1176   tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
1177   /* When we peel for alignment with niter_skip != 0 this can
1178      cause niter + niter_skip to wrap and since we are comparing the
1179      value before the decrement here we get a false early exit.
1180      We can't compare the value after decrement either because that
1181      decrement could wrap as well as we're not doing a saturating
1182      decrement.  To avoid this situation we force a larger
1183      iv_type.  */
1184   gcond *cond_stmt = gimple_build_cond (code, index_before_incr, iv_step,
1185                                         NULL_TREE, NULL_TREE);
1186   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1187
1188   /* The loop iterates (NITERS - 1 + NITERS_SKIP) / VF + 1 times.
1189      Subtract one from this to get the latch count.  */
1190   tree niters_minus_one
1191     = fold_build2 (PLUS_EXPR, TREE_TYPE (orig_niters), orig_niters,
1192                    build_minus_one_cst (TREE_TYPE (orig_niters)));
1193   tree niters_adj2 = fold_convert (iv_type, niters_minus_one);
1194   if (niters_skip)
1195     niters_adj2 = fold_build2 (PLUS_EXPR, iv_type, niters_minus_one,
1196                                fold_convert (iv_type, niters_skip));
1197   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, iv_type,
1198                                      niters_adj2, iv_step);
1199
1200   if (final_iv)
1201     {
1202       gassign *assign;
1203       /* If vectorizing an inverted early break loop we have to restart the
1204          scalar loop at niters - vf.  This matches what we do in
1205          vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
1206       if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
1207         {
1208           tree ftype = TREE_TYPE (orig_niters);
1209           tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1210           assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
1211         }
1212        else
1213         assign = gimple_build_assign (final_iv, orig_niters);
1214       gsi_insert_on_edge_immediate (exit_edge, assign);
1215     }
1216
1217   return cond_stmt;
1218 }
1219
1220
1221 /* Like vect_set_loop_condition, but handle the case in which the vector
1222    loop handles exactly VF scalars per iteration.  */
1223
1224 static gcond *
1225 vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge exit_edge,
1226                                 class loop *loop, tree niters, tree step,
1227                                 tree final_iv, bool niters_maybe_zero,
1228                                 gimple_stmt_iterator loop_cond_gsi)
1229 {
1230   tree indx_before_incr, indx_after_incr;
1231   gcond *cond_stmt;
1232   gcond *orig_cond;
1233   edge pe = loop_preheader_edge (loop);
1234   gimple_stmt_iterator incr_gsi;
1235   bool insert_after;
1236   enum tree_code code;
1237   tree niters_type = TREE_TYPE (niters);
1238
1239   orig_cond = get_loop_exit_condition (exit_edge);
1240   gcc_assert (orig_cond);
1241   loop_cond_gsi = gsi_for_stmt (orig_cond);
1242
1243   tree init, limit;
1244   if (!niters_maybe_zero && integer_onep (step))
1245     {
1246       /* In this case we can use a simple 0-based IV:
1247
1248          A:
1249            x = 0;
1250            do
1251              {
1252                ...
1253                x += 1;
1254              }
1255            while (x < NITERS);  */
1256       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1257       init = build_zero_cst (niters_type);
1258       limit = niters;
1259     }
1260   else
1261     {
1262       /* The following works for all values of NITERS except 0:
1263
1264          B:
1265            x = 0;
1266            do
1267              {
1268                ...
1269                x += STEP;
1270              }
1271            while (x <= NITERS - STEP);
1272
1273          so that the loop continues to iterate if x + STEP - 1 < NITERS
1274          but stops if x + STEP - 1 >= NITERS.
1275
1276          However, if NITERS is zero, x never hits a value above NITERS - STEP
1277          before wrapping around.  There are two obvious ways of dealing with
1278          this:
1279
1280          - start at STEP - 1 and compare x before incrementing it
1281          - start at -1 and compare x after incrementing it
1282
1283          The latter is simpler and is what we use.  The loop in this case
1284          looks like:
1285
1286          C:
1287            x = -1;
1288            do
1289              {
1290                ...
1291                x += STEP;
1292              }
1293            while (x < NITERS - STEP);
1294
1295          In both cases the loop limit is NITERS - STEP.  */
1296       gimple_seq seq = NULL;
1297       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
1298       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
1299       if (seq)
1300         {
1301           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1302           gcc_assert (!new_bb);
1303         }
1304       if (niters_maybe_zero)
1305         {
1306           /* Case C.  */
1307           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1308           init = build_all_ones_cst (niters_type);
1309         }
1310       else
1311         {
1312           /* Case B.  */
1313           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
1314           init = build_zero_cst (niters_type);
1315         }
1316     }
1317
1318   vect_iv_increment_position (exit_edge, &incr_gsi, &insert_after);
1319   create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
1320              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
1321   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
1322                                               true, NULL_TREE, true,
1323                                               GSI_SAME_STMT);
1324   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
1325                                      true, GSI_SAME_STMT);
1326
1327   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
1328                                  NULL_TREE);
1329
1330   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1331
1332   /* Record the number of latch iterations.  */
1333   if (limit == niters)
1334     /* Case A: the loop iterates NITERS times.  Subtract one to get the
1335        latch count.  */
1336     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
1337                                        build_int_cst (niters_type, 1));
1338   else
1339     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
1340        Subtract one from this to get the latch count.  */
1341     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
1342                                        limit, step);
1343
1344   if (final_iv)
1345     {
1346       gassign *assign;
1347       gcc_assert (single_pred_p (exit_edge->dest));
1348       tree phi_dest
1349         = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
1350       /* Make sure to maintain LC SSA form here and elide the subtraction
1351          if the value is zero.  */
1352       gphi *phi = create_phi_node (phi_dest, exit_edge->dest);
1353       add_phi_arg (phi, indx_after_incr, exit_edge, UNKNOWN_LOCATION);
1354       if (!integer_zerop (init))
1355         {
1356           assign = gimple_build_assign (final_iv, MINUS_EXPR,
1357                                         phi_dest, init);
1358           gimple_stmt_iterator gsi = gsi_after_labels (exit_edge->dest);
1359           gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
1360         }
1361     }
1362
1363   return cond_stmt;
1364 }
1365
1366 /* If we're using fully-masked loops, make LOOP iterate:
1367
1368       N == (NITERS - 1) / STEP + 1
1369
1370    times.  When NITERS is zero, this is equivalent to making the loop
1371    execute (1 << M) / STEP times, where M is the precision of NITERS.
1372    NITERS_MAYBE_ZERO is true if this last case might occur.
1373
1374    If we're not using fully-masked loops, make LOOP iterate:
1375
1376       N == (NITERS - STEP) / STEP + 1
1377
1378    times, where NITERS is known to be outside the range [1, STEP - 1].
1379    This is equivalent to making the loop execute NITERS / STEP times
1380    when NITERS is nonzero and (1 << M) / STEP times otherwise.
1381    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
1382
1383    If FINAL_IV is nonnull, it is an SSA name that should be set to
1384    N * STEP on exit from the loop.
1385
1386    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
1387
1388 void
1389 vect_set_loop_condition (class loop *loop, edge loop_e, loop_vec_info loop_vinfo,
1390                          tree niters, tree step, tree final_iv,
1391                          bool niters_maybe_zero)
1392 {
1393   gcond *cond_stmt;
1394   gcond *orig_cond = get_loop_exit_condition (loop_e);
1395   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
1396
1397   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
1398     {
1399       if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_avx512)
1400         cond_stmt = vect_set_loop_condition_partial_vectors_avx512 (loop, loop_e,
1401                                                                     loop_vinfo,
1402                                                                     niters, final_iv,
1403                                                                     niters_maybe_zero,
1404                                                                     loop_cond_gsi);
1405       else
1406         cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_e,
1407                                                              loop_vinfo,
1408                                                              niters, final_iv,
1409                                                              niters_maybe_zero,
1410                                                              loop_cond_gsi);
1411     }
1412   else
1413     cond_stmt = vect_set_loop_condition_normal (loop_vinfo, loop_e, loop,
1414                                                 niters,
1415                                                 step, final_iv,
1416                                                 niters_maybe_zero,
1417                                                 loop_cond_gsi);
1418
1419   /* Remove old loop exit test.  */
1420   stmt_vec_info orig_cond_info;
1421   if (loop_vinfo
1422       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
1423     loop_vinfo->remove_stmt (orig_cond_info);
1424   else
1425     gsi_remove (&loop_cond_gsi, true);
1426
1427   if (dump_enabled_p ())
1428     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
1429                      (gimple *) cond_stmt);
1430 }
1431
1432 /* Get the virtual operand live on E.  The precondition on this is valid
1433    immediate dominators and an actual virtual definition dominating E.  */
1434 /* ???  Costly band-aid.  For the use in question we can populate a
1435    live-on-exit/end-of-BB virtual operand when copying stmts.  */
1436
1437 static tree
1438 get_live_virtual_operand_on_edge (edge e)
1439 {
1440   basic_block bb = e->src;
1441   do
1442     {
1443       for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1444         {
1445           gimple *stmt = gsi_stmt (gsi);
1446           if (gimple_vdef (stmt))
1447             return gimple_vdef (stmt);
1448           if (gimple_vuse (stmt))
1449             return gimple_vuse (stmt);
1450         }
1451       if (gphi *vphi = get_virtual_phi (bb))
1452         return gimple_phi_result (vphi);
1453       bb = get_immediate_dominator (CDI_DOMINATORS, bb);
1454     }
1455   while (1);
1456 }
1457
1458 /* Given LOOP this function generates a new copy of it and puts it
1459    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
1460    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1461    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1462    entry or exit of LOOP.  If FLOW_LOOPS then connect LOOP to SCALAR_LOOP as a
1463    continuation.  This is correct for cases where one loop continues from the
1464    other like in the vectorizer, but not true for uses in e.g. loop distribution
1465    where the contents of the loop body are split but the iteration space of both
1466    copies remains the same.
1467
1468    If UPDATED_DOMS is not NULL it is update with the list of basic blocks whoms
1469    dominators were updated during the peeling.  When doing early break vectorization
1470    then LOOP_VINFO needs to be provided and is used to keep track of any newly created
1471    memory references that need to be updated should we decide to vectorize.  */
1472
1473 class loop *
1474 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
1475                                         class loop *scalar_loop,
1476                                         edge scalar_exit, edge e, edge *new_e,
1477                                         bool flow_loops,
1478                                         vec<basic_block> *updated_doms)
1479 {
1480   class loop *new_loop;
1481   basic_block *new_bbs, *bbs, *pbbs;
1482   bool at_exit;
1483   bool was_imm_dom;
1484   basic_block exit_dest;
1485   edge exit, new_exit;
1486   bool duplicate_outer_loop = false;
1487
1488   exit = loop_exit;
1489   at_exit = (e == exit);
1490   if (!at_exit && e != loop_preheader_edge (loop))
1491     return NULL;
1492
1493   if (scalar_loop == NULL)
1494     {
1495       scalar_loop = loop;
1496       scalar_exit = loop_exit;
1497     }
1498   else if (scalar_loop == loop)
1499     scalar_exit = loop_exit;
1500   else
1501     {
1502       /* Loop has been version, match exits up using the aux index.  */
1503       for (edge exit : get_loop_exit_edges (scalar_loop))
1504         if (exit->aux == loop_exit->aux)
1505           {
1506             scalar_exit = exit;
1507             break;
1508           }
1509
1510       gcc_assert (scalar_exit);
1511     }
1512
1513   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1514   pbbs = bbs + 1;
1515   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1516   /* Allow duplication of outer loops.  */
1517   if (scalar_loop->inner)
1518     duplicate_outer_loop = true;
1519
1520   /* Generate new loop structure.  */
1521   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1522   duplicate_subloops (scalar_loop, new_loop);
1523
1524   exit_dest = exit->dest;
1525   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1526                                           exit_dest) == exit->src ?
1527                  true : false);
1528
1529   /* Also copy the pre-header, this avoids jumping through hoops to
1530      duplicate the loop entry PHI arguments.  Create an empty
1531      pre-header unconditionally for this.  */
1532   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1533   edge entry_e = single_pred_edge (preheader);
1534   bbs[0] = preheader;
1535   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1536
1537   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
1538             &scalar_exit, 1, &new_exit, NULL,
1539             at_exit ? loop->latch : e->src, true);
1540   exit = loop_exit;
1541   basic_block new_preheader = new_bbs[0];
1542
1543   gcc_assert (new_exit);
1544
1545   /* Record the new loop exit information.  new_loop doesn't have SCEV data and
1546      so we must initialize the exit information.  */
1547   if (new_e)
1548     *new_e = new_exit;
1549
1550   /* Before installing PHI arguments make sure that the edges
1551      into them match that of the scalar loop we analyzed.  This
1552      makes sure the SLP tree matches up between the main vectorized
1553      loop and the epilogue vectorized copies.  */
1554   if (single_succ_edge (preheader)->dest_idx
1555       != single_succ_edge (new_bbs[0])->dest_idx)
1556     {
1557       basic_block swap_bb = new_bbs[1];
1558       gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1559       std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1560       EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1561       EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1562     }
1563   if (duplicate_outer_loop)
1564     {
1565       class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
1566       if (loop_preheader_edge (scalar_loop)->dest_idx
1567           != loop_preheader_edge (new_inner_loop)->dest_idx)
1568         {
1569           basic_block swap_bb = new_inner_loop->header;
1570           gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1571           std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1572           EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1573           EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1574         }
1575     }
1576
1577   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
1578
1579   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
1580   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
1581     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
1582
1583   /* Rename the exit uses.  */
1584   for (edge exit : get_loop_exit_edges (new_loop))
1585     for (auto gsi = gsi_start_phis (exit->dest);
1586          !gsi_end_p (gsi); gsi_next (&gsi))
1587       {
1588         tree orig_def = PHI_ARG_DEF_FROM_EDGE (gsi.phi (), exit);
1589         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), exit));
1590         if (MAY_HAVE_DEBUG_BIND_STMTS)
1591           adjust_debug_stmts (orig_def, PHI_RESULT (gsi.phi ()), exit->dest);
1592       }
1593
1594   auto loop_exits = get_loop_exit_edges (loop);
1595   bool multiple_exits_p = loop_exits.length () > 1;
1596   auto_vec<basic_block> doms;
1597
1598   if (at_exit) /* Add the loop copy at exit.  */
1599     {
1600       if (scalar_loop != loop && new_exit->dest != exit_dest)
1601         {
1602           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1603           flush_pending_stmts (new_exit);
1604         }
1605
1606       bool need_virtual_phi = get_virtual_phi (loop->header);
1607
1608       /* For the main loop exit preserve the LC PHI nodes.  For vectorization
1609          we need them to continue or finalize reductions.  Since we do not
1610          copy the loop exit blocks we have to materialize PHIs at the
1611          new destination before redirecting edges.  */
1612       for (auto gsi_from = gsi_start_phis (loop_exit->dest);
1613            !gsi_end_p (gsi_from); gsi_next (&gsi_from))
1614         {
1615           tree res = gimple_phi_result (*gsi_from);
1616           create_phi_node (copy_ssa_name (res), new_preheader);
1617         }
1618       edge e = redirect_edge_and_branch (loop_exit, new_preheader);
1619       gcc_assert (e == loop_exit);
1620       flush_pending_stmts (loop_exit);
1621       set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop_exit->src);
1622
1623       /* If we ended up choosing an exit leading to a path not using memory
1624          we can end up without a virtual LC PHI.  Create it when it is
1625          needed because of the epilog loop continuation.  */
1626       if (need_virtual_phi && !get_virtual_phi (loop_exit->dest))
1627         {
1628           tree header_def = gimple_phi_result (get_virtual_phi (loop->header));
1629           gphi *vphi = create_phi_node (copy_ssa_name (header_def),
1630                                         new_preheader);
1631           add_phi_arg (vphi, get_live_virtual_operand_on_edge (loop_exit),
1632                        loop_exit, UNKNOWN_LOCATION);
1633         }
1634
1635       bool multiple_exits_p = loop_exits.length () > 1;
1636       basic_block main_loop_exit_block = new_preheader;
1637       basic_block alt_loop_exit_block = NULL;
1638       /* Create the CFG for multiple exits.
1639                    | loop_exit               | alt1   | altN
1640                    v                         v   ...  v
1641             main_loop_exit_block:       alt_loop_exit_block:
1642                    |                      /
1643                    v                     v
1644             new_preheader:
1645          where in the new preheader we need merge PHIs for
1646          the continuation values into the epilogue header.
1647          Do not bother with exit PHIs for the early exits but
1648          their live virtual operand.  We'll fix up things below.  */
1649       if (multiple_exits_p)
1650         {
1651           edge loop_e = single_succ_edge (new_preheader);
1652           new_preheader = split_edge (loop_e);
1653
1654           gphi *vphi = NULL;
1655           alt_loop_exit_block = new_preheader;
1656           for (auto exit : loop_exits)
1657             if (exit != loop_exit)
1658               {
1659                 tree vphi_def = NULL_TREE;
1660                 if (gphi *evphi = get_virtual_phi (exit->dest))
1661                   vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
1662                 edge res = redirect_edge_and_branch (exit, alt_loop_exit_block);
1663                 gcc_assert (res == exit);
1664                 redirect_edge_var_map_clear (exit);
1665                 if (alt_loop_exit_block == new_preheader)
1666                   alt_loop_exit_block = split_edge (exit);
1667                 if (!need_virtual_phi)
1668                   continue;
1669                 /* When the edge has no virtual LC PHI get at the live
1670                    virtual operand by other means.  */
1671                 if (!vphi_def)
1672                   vphi_def = get_live_virtual_operand_on_edge (exit);
1673                 if (!vphi)
1674                   vphi = create_phi_node (copy_ssa_name (vphi_def),
1675                                           alt_loop_exit_block);
1676                 else
1677                   /* Edge redirection might re-allocate the PHI node
1678                      so we have to rediscover it.  */
1679                   vphi = get_virtual_phi (alt_loop_exit_block);
1680                 add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
1681               }
1682
1683           set_immediate_dominator (CDI_DOMINATORS, new_preheader,
1684                                    loop->header);
1685         }
1686
1687       /* Adjust the epilog loop PHI entry values to continue iteration.
1688          This adds remaining necessary LC PHI nodes to the main exit
1689          and creates merge PHIs when we have multiple exits with
1690          their appropriate continuation.  */
1691       if (flow_loops)
1692         {
1693           edge loop_entry = single_succ_edge (new_preheader);
1694           bool peeled_iters = single_pred (loop->latch) != loop_exit->src;
1695
1696           /* Record the new SSA names in the cache so that we can skip
1697              materializing them again when we fill in the rest of the LC SSA
1698              variables.  */
1699           hash_map <tree, tree> new_phi_args;
1700           for (auto psi = gsi_start_phis (main_loop_exit_block);
1701                !gsi_end_p (psi); gsi_next (&psi))
1702             {
1703               gphi *phi = *psi;
1704               tree new_arg = gimple_phi_arg_def_from_edge (phi, loop_exit);
1705               if (TREE_CODE (new_arg) != SSA_NAME)
1706                 continue;
1707
1708               /* If the loop doesn't have a virtual def then only possibly keep
1709                  the epilog LC PHI for it and avoid creating new defs.  */
1710               if (virtual_operand_p (new_arg) && !need_virtual_phi)
1711                 {
1712                   auto gsi = gsi_for_stmt (phi);
1713                   remove_phi_node (&gsi, true);
1714                   continue;
1715                 }
1716
1717               /* If we decided not to remove the PHI node we should also not
1718                  rematerialize it later on.  */
1719               new_phi_args.put (new_arg, gimple_phi_result (phi));
1720             }
1721
1722           /* Create the merge PHI nodes in new_preheader and populate the
1723              arguments for the exits.  */
1724           if (multiple_exits_p)
1725             {
1726               for (auto gsi_from = gsi_start_phis (loop->header),
1727                    gsi_to = gsi_start_phis (new_loop->header);
1728                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
1729                    gsi_next (&gsi_from), gsi_next (&gsi_to))
1730                 {
1731                   gimple *from_phi = gsi_stmt (gsi_from);
1732                   gimple *to_phi = gsi_stmt (gsi_to);
1733
1734                   /* When the vector loop is peeled then we need to use the
1735                      value at start of the loop, otherwise the main loop exit
1736                      should use the final iter value.  */
1737                   tree new_arg;
1738                   if (peeled_iters)
1739                     new_arg = gimple_phi_result (from_phi);
1740                   else
1741                     new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1742                                                      loop_latch_edge (loop));
1743
1744                   /* Check if we've already created a new phi node during edge
1745                      redirection and re-use it if so.  Otherwise create a
1746                      LC PHI node to feed the merge PHI.  */
1747                   tree *res;
1748                   if (virtual_operand_p (new_arg))
1749                     {
1750                       /* Use the existing virtual LC SSA from exit block.  */
1751                       gphi *vphi = get_virtual_phi (main_loop_exit_block);
1752                       new_arg = gimple_phi_result (vphi);
1753                     }
1754                   else if ((res = new_phi_args.get (new_arg)))
1755                     new_arg = *res;
1756                   else
1757                     {
1758                       /* Create the LC PHI node for the exit.  */
1759                       tree new_def = copy_ssa_name (new_arg);
1760                       gphi *lc_phi
1761                           = create_phi_node (new_def, main_loop_exit_block);
1762                       SET_PHI_ARG_DEF (lc_phi, 0, new_arg);
1763                       new_arg = new_def;
1764                     }
1765
1766                   /* Create the PHI node in the merge block merging the
1767                      main and early exit values.  */
1768                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
1769                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
1770                   edge main_e = single_succ_edge (main_loop_exit_block);
1771                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, main_e, new_arg);
1772
1773                   /* And adjust the epilog entry value.  */
1774                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
1775                 }
1776
1777               /* After creating the merge PHIs handle the early exits those
1778                  should use the values at the start of the loop.  */
1779               for (auto gsi_from = gsi_start_phis (loop->header),
1780                    gsi_to = gsi_start_phis (new_preheader);
1781                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
1782                    gsi_next (&gsi_from), gsi_next (&gsi_to))
1783                 {
1784                   gimple *from_phi = gsi_stmt (gsi_from);
1785                   gimple *to_phi = gsi_stmt (gsi_to);
1786
1787                   /* Now update the virtual PHI nodes with the right value.  */
1788                   tree alt_arg = gimple_phi_result (from_phi);
1789                   if (virtual_operand_p (alt_arg))
1790                     {
1791                       gphi *vphi = get_virtual_phi (alt_loop_exit_block);
1792                       alt_arg = gimple_phi_result (vphi);
1793                     }
1794                   /* For other live args we didn't create LC PHI nodes.
1795                      Do so here.  */
1796                   else
1797                     {
1798                       tree alt_def = copy_ssa_name (alt_arg);
1799                       gphi *lc_phi
1800                         = create_phi_node (alt_def, alt_loop_exit_block);
1801                       for (unsigned i = 0; i < gimple_phi_num_args (lc_phi);
1802                            ++i)
1803                         SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
1804                       alt_arg = alt_def;
1805                     }
1806                   edge alt_e = single_succ_edge (alt_loop_exit_block);
1807                   SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
1808                 }
1809             }
1810           /* For the single exit case only create the missing LC PHI nodes
1811              for the continuation of the loop IVs that are not also already
1812              reductions and thus had LC PHI nodes on the exit already.  */
1813           else
1814             {
1815               for (auto gsi_from = gsi_start_phis (loop->header),
1816                    gsi_to = gsi_start_phis (new_loop->header);
1817                    !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
1818                    gsi_next (&gsi_from), gsi_next (&gsi_to))
1819                 {
1820                   gimple *from_phi = gsi_stmt (gsi_from);
1821                   gimple *to_phi = gsi_stmt (gsi_to);
1822                   tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1823                                                         loop_latch_edge (loop));
1824
1825                   /* Check if we've already created a new phi node during edge
1826                      redirection.  If we have, only propagate the value
1827                      downwards.  */
1828                   if (tree *res = new_phi_args.get (new_arg))
1829                     {
1830                       adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
1831                       continue;
1832                     }
1833
1834                   tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
1835                   gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
1836                   SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg);
1837                   adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
1838                 }
1839             }
1840         }
1841
1842       if (was_imm_dom || duplicate_outer_loop)
1843         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1844
1845       /* And remove the non-necessary forwarder again.  Keep the other
1846          one so we have a proper pre-header for the loop at the exit edge.  */
1847       redirect_edge_pred (single_succ_edge (preheader),
1848                           single_pred (preheader));
1849       delete_basic_block (preheader);
1850       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1851                                loop_preheader_edge (scalar_loop)->src);
1852
1853       /* Finally after wiring the new epilogue we need to update its main exit
1854          to the original function exit we recorded.  Other exits are already
1855          correct.  */
1856       if (multiple_exits_p)
1857         {
1858           class loop *update_loop = new_loop;
1859           doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
1860           for (unsigned i = 0; i < doms.length (); ++i)
1861             if (flow_bb_inside_loop_p (loop, doms[i]))
1862               doms.unordered_remove (i);
1863
1864           for (edge e : get_loop_exit_edges (update_loop))
1865             {
1866               edge ex;
1867               edge_iterator ei;
1868               FOR_EACH_EDGE (ex, ei, e->dest->succs)
1869                 {
1870                   /* Find the first non-fallthrough block as fall-throughs can't
1871                      dominate other blocks.  */
1872                   if (single_succ_p (ex->dest))
1873                     {
1874                       doms.safe_push (ex->dest);
1875                       ex = single_succ_edge (ex->dest);
1876                     }
1877                   doms.safe_push (ex->dest);
1878                 }
1879               doms.safe_push (e->dest);
1880             }
1881
1882           iterate_fix_dominators (CDI_DOMINATORS, doms, false);
1883           if (updated_doms)
1884             updated_doms->safe_splice (doms);
1885         }
1886     }
1887   else /* Add the copy at entry.  */
1888     {
1889       /* Copy the current loop LC PHI nodes between the original loop exit
1890          block and the new loop header.  This allows us to later split the
1891          preheader block and still find the right LC nodes.  */
1892       if (flow_loops)
1893         for (auto gsi_from = gsi_start_phis (new_loop->header),
1894              gsi_to = gsi_start_phis (loop->header);
1895              !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
1896              gsi_next (&gsi_from), gsi_next (&gsi_to))
1897           {
1898             gimple *from_phi = gsi_stmt (gsi_from);
1899             gimple *to_phi = gsi_stmt (gsi_to);
1900             tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
1901                                                   loop_latch_edge (new_loop));
1902             adjust_phi_and_debug_stmts (to_phi, loop_preheader_edge (loop),
1903                                         new_arg);
1904           }
1905
1906       if (scalar_loop != loop)
1907         {
1908           /* Remove the non-necessary forwarder of scalar_loop again.  */
1909           redirect_edge_pred (single_succ_edge (preheader),
1910                               single_pred (preheader));
1911           delete_basic_block (preheader);
1912           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1913                                    loop_preheader_edge (scalar_loop)->src);
1914           preheader = split_edge (loop_preheader_edge (loop));
1915           entry_e = single_pred_edge (preheader);
1916         }
1917
1918       redirect_edge_and_branch_force (entry_e, new_preheader);
1919       flush_pending_stmts (entry_e);
1920       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1921
1922       redirect_edge_and_branch_force (new_exit, preheader);
1923       flush_pending_stmts (new_exit);
1924       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1925
1926       /* And remove the non-necessary forwarder again.  Keep the other
1927          one so we have a proper pre-header for the loop at the exit edge.  */
1928       redirect_edge_pred (single_succ_edge (new_preheader),
1929                           single_pred (new_preheader));
1930       delete_basic_block (new_preheader);
1931       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1932                                loop_preheader_edge (new_loop)->src);
1933
1934       /* Update dominators for multiple exits.  */
1935       if (multiple_exits_p)
1936         {
1937           for (edge alt_e : loop_exits)
1938             {
1939               if (alt_e == loop_exit)
1940                 continue;
1941               basic_block old_dom
1942                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
1943               if (flow_bb_inside_loop_p (loop, old_dom))
1944                 {
1945                   auto_vec<basic_block, 8> queue;
1946                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
1947                        son; son = next_dom_son (CDI_DOMINATORS, son))
1948                     if (!flow_bb_inside_loop_p (loop, son))
1949                       queue.safe_push (son);
1950                   for (auto son : queue)
1951                     set_immediate_dominator (CDI_DOMINATORS,
1952                                              son, get_bb_copy (old_dom));
1953                 }
1954             }
1955         }
1956     }
1957
1958   free (new_bbs);
1959   free (bbs);
1960
1961   checking_verify_dominators (CDI_DOMINATORS);
1962
1963   return new_loop;
1964 }
1965
1966
1967 /* Given the condition expression COND, put it as the last statement of
1968    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1969    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
1970    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
1971    new edge as irreducible if IRREDUCIBLE_P is true.  */
1972
1973 static edge
1974 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
1975                        basic_block guard_to, basic_block dom_bb,
1976                        profile_probability probability, bool irreducible_p)
1977 {
1978   gimple_stmt_iterator gsi;
1979   edge new_e, enter_e;
1980   gcond *cond_stmt;
1981   gimple_seq gimplify_stmt_list = NULL;
1982
1983   enter_e = EDGE_SUCC (guard_bb, 0);
1984   enter_e->flags &= ~EDGE_FALLTHRU;
1985   enter_e->flags |= EDGE_FALSE_VALUE;
1986   gsi = gsi_last_bb (guard_bb);
1987
1988   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
1989                                  is_gimple_condexpr_for_cond, NULL_TREE);
1990   if (gimplify_stmt_list)
1991     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1992
1993   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
1994   gsi = gsi_last_bb (guard_bb);
1995   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1996
1997   /* Add new edge to connect guard block to the merge/loop-exit block.  */
1998   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
1999
2000   new_e->probability = probability;
2001   if (irreducible_p)
2002     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
2003
2004   enter_e->probability = probability.invert ();
2005   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
2006
2007   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
2008   if (enter_e->dest->loop_father->header == enter_e->dest)
2009     split_edge (enter_e);
2010
2011   return new_e;
2012 }
2013
2014
2015 /* This function verifies that the following restrictions apply to LOOP:
2016    (1) it consists of exactly 2 basic blocks - header, and an empty latch
2017        for innermost loop and 5 basic blocks for outer-loop.
2018    (2) it is single entry, single exit
2019    (3) its exit condition is the last stmt in the header
2020    (4) E is the entry/exit edge of LOOP.
2021  */
2022
2023 bool
2024 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge exit_e,
2025                              const_edge e)
2026 {
2027   edge entry_e = loop_preheader_edge (loop);
2028   gcond *orig_cond = get_loop_exit_condition (exit_e);
2029   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
2030
2031   /* All loops have an outer scope; the only case loop->outer is NULL is for
2032      the function itself.  */
2033   if (!loop_outer (loop)
2034       || !empty_block_p (loop->latch)
2035       || !exit_e
2036       /* Verify that new loop exit condition can be trivially modified.  */
2037       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
2038       || (e != exit_e && e != entry_e))
2039     return false;
2040
2041   basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
2042   get_loop_body_with_size (loop, bbs, loop->num_nodes);
2043   bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
2044   free (bbs);
2045   return ret;
2046 }
2047
2048 /* Function find_loop_location.
2049
2050    Extract the location of the loop in the source code.
2051    If the loop is not well formed for vectorization, an estimated
2052    location is calculated.
2053    Return the loop location if succeed and NULL if not.  */
2054
2055 dump_user_location_t
2056 find_loop_location (class loop *loop)
2057 {
2058   gimple *stmt = NULL;
2059   basic_block bb;
2060   gimple_stmt_iterator si;
2061
2062   if (!loop)
2063     return dump_user_location_t ();
2064
2065   /* For the root of the loop tree return the function location.  */
2066   if (!loop_outer (loop))
2067     return dump_user_location_t::from_function_decl (cfun->decl);
2068
2069   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
2070     {
2071       /* We only care about the loop location, so use any exit with location
2072          information.  */
2073       for (edge e : get_loop_exit_edges (loop))
2074         {
2075           stmt = get_loop_exit_condition (e);
2076
2077           if (stmt
2078               && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
2079             return stmt;
2080         }
2081     }
2082
2083   /* If we got here the loop is probably not "well formed",
2084      try to estimate the loop location */
2085
2086   if (!loop->header)
2087     return dump_user_location_t ();
2088
2089   bb = loop->header;
2090
2091   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2092     {
2093       stmt = gsi_stmt (si);
2094       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
2095         return stmt;
2096     }
2097
2098   return dump_user_location_t ();
2099 }
2100
2101 /* Return true if the phi described by STMT_INFO defines an IV of the
2102    loop to be vectorized.  */
2103
2104 static bool
2105 iv_phi_p (stmt_vec_info stmt_info)
2106 {
2107   gphi *phi = as_a <gphi *> (stmt_info->stmt);
2108   if (virtual_operand_p (PHI_RESULT (phi)))
2109     return false;
2110
2111   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
2112       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
2113     return false;
2114
2115   return true;
2116 }
2117
2118 /* Return true if vectorizer can peel for nonlinear iv.  */
2119 static bool
2120 vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
2121                               stmt_vec_info stmt_info)
2122 {
2123   enum vect_induction_op_type induction_type
2124     = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
2125   tree niters_skip;
2126   /* Init_expr will be update by vect_update_ivs_after_vectorizer,
2127      if niters or vf is unkown:
2128      For shift, when shift mount >= precision, there would be UD.
2129      For mult, don't known how to generate
2130      init_expr * pow (step, niters) for variable niters.
2131      For neg unknown niters are ok, since niters of vectorized main loop
2132      will always be multiple of 2.
2133      See also PR113163,  PR114196 and PR114485.  */
2134   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
2135       || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2136       || (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2137           && induction_type != vect_step_op_neg))
2138     {
2139       if (dump_enabled_p ())
2140         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141                          "Peeling for epilogue is not supported"
2142                          " for this nonlinear induction"
2143                          " when iteration count is unknown or"
2144                          " when using partial vectorization.\n");
2145       return false;
2146     }
2147
2148   /* Avoid compile time hog on vect_peel_nonlinear_iv_init.  */
2149   if (induction_type == vect_step_op_mul)
2150     {
2151       tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
2152       tree type = TREE_TYPE (step_expr);
2153
2154       if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
2155           && LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
2156         {
2157           if (dump_enabled_p ())
2158             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2159                              "Avoid compile time hog on"
2160                              " vect_peel_nonlinear_iv_init"
2161                              " for nonlinear induction vec_step_op_mul"
2162                              " when iteration count is too big.\n");
2163           return false;
2164         }
2165     }
2166
2167   /* Also doens't support peel for neg when niter is variable.
2168      ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
2169   niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
2170   if ((niters_skip != NULL_TREE
2171        && (TREE_CODE (niters_skip) != INTEGER_CST
2172            || (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
2173       || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
2174           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
2175     {
2176       if (dump_enabled_p ())
2177         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178                          "Peeling for alignement is not supported"
2179                          " for nonlinear induction when niters_skip"
2180                          " is not constant.\n");
2181       return false;
2182     }
2183
2184   return true;
2185 }
2186
2187 /* Function vect_can_advance_ivs_p
2188
2189    In case the number of iterations that LOOP iterates is unknown at compile
2190    time, an epilog loop will be generated, and the loop induction variables
2191    (IVs) will be "advanced" to the value they are supposed to take just before
2192    the epilog loop.  Here we check that the access function of the loop IVs
2193    and the expression that represents the loop bound are simple enough.
2194    These restrictions will be relaxed in the future.  */
2195
2196 bool
2197 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2198 {
2199   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2200   basic_block bb = loop->header;
2201   gphi_iterator gsi;
2202
2203   /* Analyze phi functions of the loop header.  */
2204
2205   if (dump_enabled_p ())
2206     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
2207   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2208     {
2209       tree evolution_part;
2210       enum vect_induction_op_type induction_type;
2211
2212       gphi *phi = gsi.phi ();
2213       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2214       if (dump_enabled_p ())
2215         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
2216                          phi_info->stmt);
2217
2218       /* Skip virtual phi's. The data dependences that are associated with
2219          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
2220
2221          Skip reduction phis.  */
2222       if (!iv_phi_p (phi_info))
2223         {
2224           if (dump_enabled_p ())
2225             dump_printf_loc (MSG_NOTE, vect_location,
2226                              "reduc or virtual phi. skip.\n");
2227           continue;
2228         }
2229
2230       induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2231       if (induction_type != vect_step_op_add)
2232         {
2233           if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
2234             return false;
2235
2236           continue;
2237         }
2238
2239       /* Analyze the evolution function.  */
2240
2241       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2242       if (evolution_part == NULL_TREE)
2243         {
2244           if (dump_enabled_p ())
2245             dump_printf (MSG_MISSED_OPTIMIZATION,
2246                          "No access function or evolution.\n");
2247           return false;
2248         }
2249
2250       /* FORNOW: We do not transform initial conditions of IVs
2251          which evolution functions are not invariants in the loop.  */
2252
2253       if (!expr_invariant_in_loop_p (loop, evolution_part))
2254         {
2255           if (dump_enabled_p ())
2256             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2257                              "evolution not invariant in loop.\n");
2258           return false;
2259         }
2260
2261       /* FORNOW: We do not transform initial conditions of IVs
2262          which evolution functions are a polynomial of degree >= 2.  */
2263
2264       if (tree_is_chrec (evolution_part))
2265         {
2266           if (dump_enabled_p ())
2267             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2268                              "evolution is chrec.\n");
2269           return false;
2270         }
2271     }
2272
2273   return true;
2274 }
2275
2276
2277 /*   Function vect_update_ivs_after_vectorizer.
2278
2279      "Advance" the induction variables of LOOP to the value they should take
2280      after the execution of LOOP.  This is currently necessary because the
2281      vectorizer does not handle induction variables that are used after the
2282      loop.  Such a situation occurs when the last iterations of LOOP are
2283      peeled, because:
2284      1. We introduced new uses after LOOP for IVs that were not originally used
2285         after LOOP: the IVs of LOOP are now used by an epilog loop.
2286      2. LOOP is going to be vectorized; this means that it will iterate N/VF
2287         times, whereas the loop IVs should be bumped N times.
2288
2289      Input:
2290      - LOOP - a loop that is going to be vectorized. The last few iterations
2291               of LOOP were peeled.
2292      - NITERS - the number of iterations that LOOP executes (before it is
2293                 vectorized). i.e, the number of times the ivs should be bumped.
2294      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2295                   coming out from LOOP on which there are uses of the LOOP ivs
2296                   (this is the path from LOOP->exit to epilog_loop->preheader).
2297
2298                   The new definitions of the ivs are placed in LOOP->exit.
2299                   The phi args associated with the edge UPDATE_E in the bb
2300                   UPDATE_E->dest are updated accordingly.
2301
2302      Assumption 1: Like the rest of the vectorizer, this function assumes
2303      a single loop exit that has a single predecessor.
2304
2305      Assumption 2: The phi nodes in the LOOP header and in update_bb are
2306      organized in the same order.
2307
2308      Assumption 3: The access function of the ivs is simple enough (see
2309      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2310
2311      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2312      coming out of LOOP on which the ivs of LOOP are used (this is the path
2313      that leads to the epilog loop; other paths skip the epilog loop).  This
2314      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2315      needs to have its phis updated.
2316  */
2317
2318 static void
2319 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
2320                                   tree niters, edge update_e)
2321 {
2322   gphi_iterator gsi, gsi1;
2323   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2324   basic_block update_bb = update_e->dest;
2325   basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
2326   gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
2327
2328   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
2329        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
2330        gsi_next (&gsi), gsi_next (&gsi1))
2331     {
2332       tree init_expr;
2333       tree step_expr, off;
2334       tree type;
2335       tree var, ni, ni_name;
2336
2337       gphi *phi = gsi.phi ();
2338       gphi *phi1 = gsi1.phi ();
2339       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
2340       if (dump_enabled_p ())
2341         dump_printf_loc (MSG_NOTE, vect_location,
2342                          "vect_update_ivs_after_vectorizer: phi: %G",
2343                          (gimple *) phi);
2344
2345       /* Skip reduction and virtual phis.  */
2346       if (!iv_phi_p (phi_info))
2347         {
2348           if (dump_enabled_p ())
2349             dump_printf_loc (MSG_NOTE, vect_location,
2350                              "reduc or virtual phi. skip.\n");
2351           continue;
2352         }
2353
2354       type = TREE_TYPE (gimple_phi_result (phi));
2355       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
2356       step_expr = unshare_expr (step_expr);
2357
2358       /* FORNOW: We do not support IVs whose evolution function is a polynomial
2359          of degree >= 2 or exponential.  */
2360       gcc_assert (!tree_is_chrec (step_expr));
2361
2362       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2363       gimple_seq stmts = NULL;
2364       enum vect_induction_op_type induction_type
2365         = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
2366
2367       if (induction_type == vect_step_op_add)
2368         {
2369           tree stype = TREE_TYPE (step_expr);
2370           off = fold_build2 (MULT_EXPR, stype,
2371                                fold_convert (stype, niters), step_expr);
2372
2373           if (POINTER_TYPE_P (type))
2374             ni = fold_build_pointer_plus (init_expr, off);
2375           else
2376             ni = fold_convert (type,
2377                                fold_build2 (PLUS_EXPR, stype,
2378                                             fold_convert (stype, init_expr),
2379                                             off));
2380         }
2381       /* Don't bother call vect_peel_nonlinear_iv_init.  */
2382       else if (induction_type == vect_step_op_neg)
2383         ni = init_expr;
2384       else
2385         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
2386                                           niters, step_expr,
2387                                           induction_type);
2388
2389       var = create_tmp_var (type, "tmp");
2390
2391       gimple_seq new_stmts = NULL;
2392       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
2393
2394       /* Exit_bb shouldn't be empty.  */
2395       if (!gsi_end_p (last_gsi))
2396         {
2397           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
2398           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
2399         }
2400       else
2401         {
2402           gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
2403           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
2404         }
2405
2406       /* Fix phi expressions in the successor bb.  */
2407       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
2408     }
2409 }
2410
2411 /* Return a gimple value containing the misalignment (measured in vector
2412    elements) for the loop described by LOOP_VINFO, i.e. how many elements
2413    it is away from a perfectly aligned address.  Add any new statements
2414    to SEQ.  */
2415
2416 static tree
2417 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
2418 {
2419   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2420   stmt_vec_info stmt_info = dr_info->stmt;
2421   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2422
2423   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2424   unsigned HOST_WIDE_INT target_align_c;
2425   tree target_align_minus_1;
2426
2427   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2428                                         size_zero_node) < 0;
2429   tree offset = (negative
2430                  ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2431                              * TREE_INT_CST_LOW
2432                                  (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
2433                  : size_zero_node);
2434   tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
2435                                                           stmt_info, seq,
2436                                                           offset);
2437   tree type = unsigned_type_for (TREE_TYPE (start_addr));
2438   if (target_align.is_constant (&target_align_c))
2439     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
2440   else
2441     {
2442       tree vla = build_int_cst (type, target_align);
2443       tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
2444                                     fold_build2 (MINUS_EXPR, type,
2445                                                  build_int_cst (type, 0), vla));
2446       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
2447                                           build_int_cst (type, 1));
2448     }
2449
2450   HOST_WIDE_INT elem_size
2451     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2452   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
2453
2454   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
2455   tree int_start_addr = fold_convert (type, start_addr);
2456   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
2457                                         target_align_minus_1);
2458
2459   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
2460   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
2461                                         elem_size_log);
2462
2463   return misalign_in_elems;
2464 }
2465
2466 /* Function vect_gen_prolog_loop_niters
2467
2468    Generate the number of iterations which should be peeled as prolog for the
2469    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
2470    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
2471    As a result, after the execution of this loop, the data reference DR will
2472    refer to an aligned location.  The following computation is generated:
2473
2474    If the misalignment of DR is known at compile time:
2475      addr_mis = int mis = DR_MISALIGNMENT (dr);
2476    Else, compute address misalignment in bytes:
2477      addr_mis = addr & (target_align - 1)
2478
2479    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
2480
2481    (elem_size = element type size; an element is the scalar element whose type
2482    is the inner type of the vectype)
2483
2484    The computations will be emitted at the end of BB.  We also compute and
2485    store upper bound (included) of the result in BOUND.
2486
2487    When the step of the data-ref in the loop is not 1 (as in interleaved data
2488    and SLP), the number of iterations of the prolog must be divided by the step
2489    (which is equal to the size of interleaved group).
2490
2491    The above formulas assume that VF == number of elements in the vector. This
2492    may not hold when there are multiple-types in the loop.
2493    In this case, for some data-references in the loop the VF does not represent
2494    the number of elements that fit in the vector.  Therefore, instead of VF we
2495    use TYPE_VECTOR_SUBPARTS.  */
2496
2497 static tree
2498 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
2499                              basic_block bb, int *bound)
2500 {
2501   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2502   tree var;
2503   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2504   gimple_seq stmts = NULL, new_stmts = NULL;
2505   tree iters, iters_name;
2506   stmt_vec_info stmt_info = dr_info->stmt;
2507   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2508   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
2509
2510   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2511     {
2512       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2513
2514       if (dump_enabled_p ())
2515         dump_printf_loc (MSG_NOTE, vect_location,
2516                          "known peeling = %d.\n", npeel);
2517
2518       iters = build_int_cst (niters_type, npeel);
2519       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2520     }
2521   else
2522     {
2523       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
2524       tree type = TREE_TYPE (misalign_in_elems);
2525       HOST_WIDE_INT elem_size
2526         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2527       /* We only do prolog peeling if the target alignment is known at compile
2528          time.  */
2529       poly_uint64 align_in_elems =
2530         exact_div (target_align, elem_size);
2531       tree align_in_elems_minus_1 =
2532         build_int_cst (type, align_in_elems - 1);
2533       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
2534
2535       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
2536                                  & (align_in_elems - 1)).  */
2537       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2538                                             size_zero_node) < 0;
2539       if (negative)
2540         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
2541                              align_in_elems_tree);
2542       else
2543         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
2544                              misalign_in_elems);
2545       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
2546       iters = fold_convert (niters_type, iters);
2547       unsigned HOST_WIDE_INT align_in_elems_c;
2548       if (align_in_elems.is_constant (&align_in_elems_c))
2549         *bound = align_in_elems_c - 1;
2550       else
2551         *bound = -1;
2552     }
2553
2554   if (dump_enabled_p ())
2555     dump_printf_loc (MSG_NOTE, vect_location,
2556                      "niters for prolog loop: %T\n", iters);
2557
2558   var = create_tmp_var (niters_type, "prolog_loop_niters");
2559   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
2560
2561   if (new_stmts)
2562     gimple_seq_add_seq (&stmts, new_stmts);
2563   if (stmts)
2564     {
2565       gcc_assert (single_succ_p (bb));
2566       gimple_stmt_iterator gsi = gsi_last_bb (bb);
2567       if (gsi_end_p (gsi))
2568         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2569       else
2570         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2571     }
2572   return iters_name;
2573 }
2574
2575
2576 /* Function vect_update_init_of_dr
2577
2578    If CODE is PLUS, the vector loop starts NITERS iterations after the
2579    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
2580    iterations before the scalar one (using masking to skip inactive
2581    elements).  This function updates the information recorded in DR to
2582    account for the difference.  Specifically, it updates the OFFSET
2583    field of DR_INFO.  */
2584
2585 static void
2586 vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
2587 {
2588   struct data_reference *dr = dr_info->dr;
2589   tree offset = dr_info->offset;
2590   if (!offset)
2591     offset = build_zero_cst (sizetype);
2592
2593   niters = fold_build2 (MULT_EXPR, sizetype,
2594                         fold_convert (sizetype, niters),
2595                         fold_convert (sizetype, DR_STEP (dr)));
2596   offset = fold_build2 (code, sizetype,
2597                         fold_convert (sizetype, offset), niters);
2598   dr_info->offset = offset;
2599 }
2600
2601
2602 /* Function vect_update_inits_of_drs
2603
2604    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
2605    CODE and NITERS are as for vect_update_inits_of_dr.  */
2606
2607 void
2608 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
2609                           tree_code code)
2610 {
2611   unsigned int i;
2612   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2613   struct data_reference *dr;
2614
2615   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
2616
2617   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
2618      here, but since we might use these niters to update the epilogues niters
2619      and data references we can't insert them here as this definition might not
2620      always dominate its uses.  */
2621   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
2622     niters = fold_convert (sizetype, niters);
2623
2624   FOR_EACH_VEC_ELT (datarefs, i, dr)
2625     {
2626       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2627       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
2628           && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
2629         vect_update_init_of_dr (dr_info, niters, code);
2630     }
2631 }
2632
2633 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
2634    by masking.  This involves calculating the number of iterations to
2635    be peeled and then aligning all memory references appropriately.  */
2636
2637 void
2638 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
2639 {
2640   tree misalign_in_elems;
2641   tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
2642
2643   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
2644
2645   /* From the information recorded in LOOP_VINFO get the number of iterations
2646      that need to be skipped via masking.  */
2647   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2648     {
2649       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2650                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
2651       misalign_in_elems = build_int_cst (type, misalign);
2652     }
2653   else
2654     {
2655       gimple_seq seq1 = NULL, seq2 = NULL;
2656       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
2657       misalign_in_elems = fold_convert (type, misalign_in_elems);
2658       misalign_in_elems = force_gimple_operand (misalign_in_elems,
2659                                                 &seq2, true, NULL_TREE);
2660       gimple_seq_add_seq (&seq1, seq2);
2661       if (seq1)
2662         {
2663           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2664           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
2665           gcc_assert (!new_bb);
2666         }
2667     }
2668
2669   if (dump_enabled_p ())
2670     dump_printf_loc (MSG_NOTE, vect_location,
2671                      "misalignment for fully-masked loop: %T\n",
2672                      misalign_in_elems);
2673
2674   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
2675
2676   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
2677 }
2678
2679 /* This function builds ni_name = number of iterations.  Statements
2680    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
2681    it to TRUE if new ssa_var is generated.  */
2682
2683 tree
2684 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
2685 {
2686   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2687   if (TREE_CODE (ni) == INTEGER_CST)
2688     return ni;
2689   else
2690     {
2691       tree ni_name, var;
2692       gimple_seq stmts = NULL;
2693       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2694
2695       var = create_tmp_var (TREE_TYPE (ni), "niters");
2696       ni_name = force_gimple_operand (ni, &stmts, false, var);
2697       if (stmts)
2698         {
2699           gsi_insert_seq_on_edge_immediate (pe, stmts);
2700           if (new_var_p != NULL)
2701             *new_var_p = true;
2702         }
2703
2704       return ni_name;
2705     }
2706 }
2707
2708 /* Calculate the number of iterations above which vectorized loop will be
2709    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
2710    of prolog loop.  If it's integer const, the integer number is also passed
2711    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
2712    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
2713    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
2714    threshold below which the scalar (rather than vectorized) loop will be
2715    executed.  This function stores the upper bound (inclusive) of the result
2716    in BOUND_SCALAR.  */
2717
2718 static tree
2719 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
2720                              int bound_prolog, poly_int64 bound_epilog, int th,
2721                              poly_uint64 *bound_scalar,
2722                              bool check_profitability)
2723 {
2724   tree type = TREE_TYPE (niters_prolog);
2725   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
2726                              build_int_cst (type, bound_epilog));
2727
2728   *bound_scalar = bound_prolog + bound_epilog;
2729   if (check_profitability)
2730     {
2731       /* TH indicates the minimum niters of vectorized loop, while we
2732          compute the maximum niters of scalar loop.  */
2733       th--;
2734       /* Peeling for constant times.  */
2735       if (int_niters_prolog >= 0)
2736         {
2737           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
2738           return build_int_cst (type, *bound_scalar);
2739         }
2740       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
2741          and BOUND_EPILOG are inclusive upper bounds.  */
2742       if (known_ge (th, bound_prolog + bound_epilog))
2743         {
2744           *bound_scalar = th;
2745           return build_int_cst (type, th);
2746         }
2747       /* Need to do runtime comparison.  */
2748       else if (maybe_gt (th, bound_epilog))
2749         {
2750           *bound_scalar = upper_bound (*bound_scalar, th);
2751           return fold_build2 (MAX_EXPR, type,
2752                               build_int_cst (type, th), niters);
2753         }
2754     }
2755   return niters;
2756 }
2757
2758 /* NITERS is the number of times that the original scalar loop executes
2759    after peeling.  Work out the maximum number of iterations N that can
2760    be handled by the vectorized form of the loop and then either:
2761
2762    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
2763
2764         niters_vector = N
2765
2766    b) set *STEP_VECTOR_PTR to one and generate:
2767
2768         niters_vector = N / vf
2769
2770    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
2771    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
2772    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).  */
2773
2774 void
2775 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
2776                              tree *niters_vector_ptr, tree *step_vector_ptr,
2777                              bool niters_no_overflow)
2778 {
2779   tree ni_minus_gap, var;
2780   tree niters_vector, step_vector, type = TREE_TYPE (niters);
2781   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2782   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2783   tree log_vf = NULL_TREE;
2784
2785   /* If epilogue loop is required because of data accesses with gaps, we
2786      subtract one iteration from the total number of iterations here for
2787      correct calculation of RATIO.  */
2788   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2789     {
2790       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
2791                                   build_one_cst (type));
2792       if (!is_gimple_val (ni_minus_gap))
2793         {
2794           var = create_tmp_var (type, "ni_gap");
2795           gimple *stmts = NULL;
2796           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
2797                                                true, var);
2798           gsi_insert_seq_on_edge_immediate (pe, stmts);
2799         }
2800     }
2801   else
2802     ni_minus_gap = niters;
2803
2804   /* To silence some unexpected warnings, simply initialize to 0. */
2805   unsigned HOST_WIDE_INT const_vf = 0;
2806   if (vf.is_constant (&const_vf)
2807       && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2808     {
2809       /* Create: niters >> log2(vf) */
2810       /* If it's known that niters == number of latch executions + 1 doesn't
2811          overflow, we can generate niters >> log2(vf); otherwise we generate
2812          (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
2813          will be at least one.  */
2814       log_vf = build_int_cst (type, exact_log2 (const_vf));
2815       if (niters_no_overflow)
2816         niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
2817       else
2818         niters_vector
2819           = fold_build2 (PLUS_EXPR, type,
2820                          fold_build2 (RSHIFT_EXPR, type,
2821                                       fold_build2 (MINUS_EXPR, type,
2822                                                    ni_minus_gap,
2823                                                    build_int_cst (type, vf)),
2824                                       log_vf),
2825                          build_int_cst (type, 1));
2826       step_vector = build_one_cst (type);
2827     }
2828   else
2829     {
2830       niters_vector = ni_minus_gap;
2831       step_vector = build_int_cst (type, vf);
2832     }
2833
2834   if (!is_gimple_val (niters_vector))
2835     {
2836       var = create_tmp_var (type, "bnd");
2837       gimple_seq stmts = NULL;
2838       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
2839       gsi_insert_seq_on_edge_immediate (pe, stmts);
2840       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
2841          we set range information to make niters analyzer's life easier.
2842          Note the number of latch iteration value can be TYPE_MAX_VALUE so
2843          we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf.  */
2844       if (stmts != NULL && log_vf)
2845         {
2846           if (niters_no_overflow)
2847             {
2848               value_range vr (type,
2849                               wi::one (TYPE_PRECISION (type)),
2850                               wi::rshift (wi::max_value (TYPE_PRECISION (type),
2851                                                          TYPE_SIGN (type)),
2852                                           exact_log2 (const_vf),
2853                                           TYPE_SIGN (type)));
2854               set_range_info (niters_vector, vr);
2855             }
2856           /* For VF == 1 the vector IV might also overflow so we cannot
2857              assert a minimum value of 1.  */
2858           else if (const_vf > 1)
2859             {
2860               value_range vr (type,
2861                               wi::one (TYPE_PRECISION (type)),
2862                               wi::rshift (wi::max_value (TYPE_PRECISION (type),
2863                                                          TYPE_SIGN (type))
2864                                           - (const_vf - 1),
2865                                           exact_log2 (const_vf), TYPE_SIGN (type))
2866                               + 1);
2867               set_range_info (niters_vector, vr);
2868             }
2869         }
2870     }
2871   *niters_vector_ptr = niters_vector;
2872   *step_vector_ptr = step_vector;
2873
2874   return;
2875 }
2876
2877 /* Given NITERS_VECTOR which is the number of iterations for vectorized
2878    loop specified by LOOP_VINFO after vectorization, compute the number
2879    of iterations before vectorization (niters_vector * vf) and store it
2880    to NITERS_VECTOR_MULT_VF_PTR.  */
2881
2882 static void
2883 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2884                                      tree niters_vector,
2885                                      tree *niters_vector_mult_vf_ptr)
2886 {
2887   /* We should be using a step_vector of VF if VF is variable.  */
2888   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
2889   tree type = TREE_TYPE (niters_vector);
2890   tree log_vf = build_int_cst (type, exact_log2 (vf));
2891   tree tree_vf = build_int_cst (type, vf);
2892   basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
2893
2894   gcc_assert (niters_vector_mult_vf_ptr != NULL);
2895   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2896                                             niters_vector, log_vf);
2897
2898   /* If we've peeled a vector iteration then subtract one full vector
2899      iteration.  */
2900   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
2901     niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
2902                                          niters_vector_mult_vf, tree_vf);
2903
2904   if (!is_gimple_val (niters_vector_mult_vf))
2905     {
2906       tree var = create_tmp_var (type, "niters_vector_mult_vf");
2907       gimple_seq stmts = NULL;
2908       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2909                                                     &stmts, true, var);
2910       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2911       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2912     }
2913   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2914 }
2915
2916 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
2917    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
2918    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
2919    appear like below:
2920
2921        guard_bb:
2922          if (cond)
2923            goto merge_bb;
2924          else
2925            goto skip_loop;
2926
2927      skip_loop:
2928        header_a:
2929          i_1 = PHI<i_0, i_2>;
2930          ...
2931          i_2 = i_1 + 1;
2932          if (cond_a)
2933            goto latch_a;
2934          else
2935            goto exit_a;
2936        latch_a:
2937          goto header_a;
2938
2939        exit_a:
2940          i_5 = PHI<i_2>;
2941
2942        merge_bb:
2943          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2944
2945      update_loop:
2946        header_b:
2947          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
2948          ...
2949          i_4 = i_3 + 1;
2950          if (cond_b)
2951            goto latch_b;
2952          else
2953            goto exit_bb;
2954        latch_b:
2955          goto header_b;
2956
2957        exit_bb:
2958
2959    This function creates PHI nodes at merge_bb and replaces the use of i_5
2960    in the update_loop's PHI node with the result of new PHI result.  */
2961
2962 static void
2963 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2964                                     class loop *update_loop,
2965                                     edge guard_edge, edge merge_edge)
2966 {
2967   location_t merge_loc, guard_loc;
2968   edge orig_e = loop_preheader_edge (skip_loop);
2969   edge update_e = loop_preheader_edge (update_loop);
2970   gphi_iterator gsi_orig, gsi_update;
2971
2972   for ((gsi_orig = gsi_start_phis (skip_loop->header),
2973         gsi_update = gsi_start_phis (update_loop->header));
2974        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2975        gsi_next (&gsi_orig), gsi_next (&gsi_update))
2976     {
2977       gphi *orig_phi = gsi_orig.phi ();
2978       gphi *update_phi = gsi_update.phi ();
2979
2980       /* Generate new phi node at merge bb of the guard.  */
2981       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2982       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2983
2984       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
2985          args in NEW_PHI for these edges.  */
2986       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2987       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2988       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2989       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2990       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2991       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2992
2993       /* Update phi in UPDATE_PHI.  */
2994       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2995     }
2996 }
2997
2998 /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
2999    Return a value that equals:
3000
3001    - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
3002    - SKIP_VALUE when the main loop is skipped.  */
3003
3004 tree
3005 vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
3006                            tree skip_value)
3007 {
3008   gcc_assert (loop_vinfo->main_loop_edge);
3009
3010   tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
3011   basic_block bb = loop_vinfo->main_loop_edge->dest;
3012   gphi *new_phi = create_phi_node (phi_result, bb);
3013   add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
3014                UNKNOWN_LOCATION);
3015   add_phi_arg (new_phi, skip_value,
3016                loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
3017   return phi_result;
3018 }
3019
3020 /* Function vect_do_peeling.
3021
3022    Input:
3023    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
3024
3025        preheader:
3026      LOOP:
3027        header_bb:
3028          loop_body
3029          if (exit_loop_cond) goto exit_bb
3030          else                goto header_bb
3031        exit_bb:
3032
3033    - NITERS: The number of iterations of the loop.
3034    - NITERSM1: The number of iterations of the loop's latch.
3035    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
3036    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
3037                               CHECK_PROFITABILITY is true.
3038    Output:
3039    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
3040      iterate after vectorization; see vect_set_loop_condition for details.
3041    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
3042      should be set to the number of scalar iterations handled by the
3043      vector loop.  The SSA name is only used on exit from the loop.
3044
3045    This function peels prolog and epilog from the loop, adds guards skipping
3046    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
3047    would look like:
3048
3049        guard_bb_1:
3050          if (prefer_scalar_loop) goto merge_bb_1
3051          else                    goto guard_bb_2
3052
3053        guard_bb_2:
3054          if (skip_prolog) goto merge_bb_2
3055          else             goto prolog_preheader
3056
3057        prolog_preheader:
3058      PROLOG:
3059        prolog_header_bb:
3060          prolog_body
3061          if (exit_prolog_cond) goto prolog_exit_bb
3062          else                  goto prolog_header_bb
3063        prolog_exit_bb:
3064
3065        merge_bb_2:
3066
3067        vector_preheader:
3068      VECTOR LOOP:
3069        vector_header_bb:
3070          vector_body
3071          if (exit_vector_cond) goto vector_exit_bb
3072          else                  goto vector_header_bb
3073        vector_exit_bb:
3074
3075        guard_bb_3:
3076          if (skip_epilog) goto merge_bb_3
3077          else             goto epilog_preheader
3078
3079        merge_bb_1:
3080
3081        epilog_preheader:
3082      EPILOG:
3083        epilog_header_bb:
3084          epilog_body
3085          if (exit_epilog_cond) goto merge_bb_3
3086          else                  goto epilog_header_bb
3087
3088        merge_bb_3:
3089
3090    Note this function peels prolog and epilog only if it's necessary,
3091    as well as guards.
3092    This function returns the epilogue loop if a decision was made to vectorize
3093    it, otherwise NULL.
3094
3095    The analysis resulting in this epilogue loop's loop_vec_info was performed
3096    in the same vect_analyze_loop call as the main loop's.  At that time
3097    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
3098    vectorization factors than the main loop.  This list is stored in the main
3099    loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide to
3100    vectorize the epilogue loop for a lower vectorization factor,  the
3101    loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
3102    updated and linked to the epilogue loop.  This is later used to vectorize
3103    the epilogue.  The reason the loop_vec_info needs updating is that it was
3104    constructed based on the original main loop, and the epilogue loop is a
3105    copy of this loop, so all links pointing to statements in the original loop
3106    need updating.  Furthermore, these loop_vec_infos share the
3107    data_reference's records, which will also need to be updated.
3108
3109    TODO: Guard for prefer_scalar_loop should be emitted along with
3110    versioning conditions if loop versioning is needed.  */
3111
3112
3113 class loop *
3114 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
3115                  tree *niters_vector, tree *step_vector,
3116                  tree *niters_vector_mult_vf_var, int th,
3117                  bool check_profitability, bool niters_no_overflow,
3118                  tree *advance)
3119 {
3120   edge e, guard_e;
3121   tree type = TREE_TYPE (niters), guard_cond;
3122   basic_block guard_bb, guard_to;
3123   profile_probability prob_prolog, prob_vector, prob_epilog;
3124   int estimated_vf;
3125   int prolog_peeling = 0;
3126   bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
3127   /* We currently do not support prolog peeling if the target alignment is not
3128      known at compile time.  'vect_gen_prolog_loop_niters' depends on the
3129      target alignment being constant.  */
3130   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3131   if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
3132     return NULL;
3133
3134   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
3135     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3136
3137   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3138   poly_uint64 bound_epilog = 0;
3139   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
3140       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
3141     bound_epilog += vf - 1;
3142   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3143     bound_epilog += 1;
3144
3145   /* For early breaks the scalar loop needs to execute at most VF times
3146      to find the element that caused the break.  */
3147   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3148     bound_epilog = vf;
3149
3150   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
3151   poly_uint64 bound_scalar = bound_epilog;
3152
3153   if (!prolog_peeling && !epilog_peeling)
3154     return NULL;
3155
3156   /* Before doing any peeling make sure to reset debug binds outside of
3157      the loop refering to defs not in LC SSA.  */
3158   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3159   for (unsigned i = 0; i < loop->num_nodes; ++i)
3160     {
3161       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
3162       imm_use_iterator ui;
3163       gimple *use_stmt;
3164       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
3165            gsi_next (&gsi))
3166         {
3167           FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
3168             if (gimple_debug_bind_p (use_stmt)
3169                 && loop != gimple_bb (use_stmt)->loop_father
3170                 && !flow_loop_nested_p (loop,
3171                                         gimple_bb (use_stmt)->loop_father))
3172               {
3173                 gimple_debug_bind_reset_value (use_stmt);
3174                 update_stmt (use_stmt);
3175               }
3176         }
3177       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3178            gsi_next (&gsi))
3179         {
3180           ssa_op_iter op_iter;
3181           def_operand_p def_p;
3182           FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
3183             FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
3184               if (gimple_debug_bind_p (use_stmt)
3185                   && loop != gimple_bb (use_stmt)->loop_father
3186                   && !flow_loop_nested_p (loop,
3187                                           gimple_bb (use_stmt)->loop_father))
3188                 {
3189                   gimple_debug_bind_reset_value (use_stmt);
3190                   update_stmt (use_stmt);
3191                 }
3192         }
3193     }
3194
3195   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
3196   estimated_vf = vect_vf_for_cost (loop_vinfo);
3197   if (estimated_vf == 2)
3198     estimated_vf = 3;
3199   prob_prolog = prob_epilog = profile_probability::guessed_always ()
3200                         .apply_scale (estimated_vf - 1, estimated_vf);
3201
3202   class loop *prolog, *epilog = NULL;
3203   class loop *first_loop = loop;
3204   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
3205
3206   /* SSA form needs to be up-to-date since we are going to manually
3207      update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
3208      update SSA state after that, so we have to make sure to not lose any
3209      pending update needs.  */
3210   gcc_assert (!need_ssa_update_p (cfun));
3211
3212   /* If we're vectorizing an epilogue loop, we have ensured that the
3213      virtual operand is in SSA form throughout the vectorized main loop.
3214      Normally it is possible to trace the updated
3215      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
3216      back to scalar-stmt vuses, meaning that the effect of the SSA update
3217      remains local to the main loop.  However, there are rare cases in
3218      which the vectorized loop should have vdefs even when the original scalar
3219      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
3220      introduces clobbers of the temporary vector array, which in turn
3221      needs new vdefs.  If the scalar loop doesn't write to memory, these
3222      new vdefs will be the only ones in the vector loop.
3223      We are currently defering updating virtual SSA form and creating
3224      of a virtual PHI for this case so we do not have to make sure the
3225      newly introduced virtual def is in LCSSA form.  */
3226
3227   if (MAY_HAVE_DEBUG_BIND_STMTS)
3228     {
3229       gcc_assert (!adjust_vec.exists ());
3230       adjust_vec.create (32);
3231     }
3232   initialize_original_copy_tables ();
3233
3234   /* Record the anchor bb at which the guard should be placed if the scalar
3235      loop might be preferred.  */
3236   basic_block anchor = loop_preheader_edge (loop)->src;
3237
3238   /* Generate the number of iterations for the prolog loop.  We do this here
3239      so that we can also get the upper bound on the number of iterations.  */
3240   tree niters_prolog;
3241   int bound_prolog = 0;
3242   if (prolog_peeling)
3243     {
3244       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
3245                                                     &bound_prolog);
3246       /* If algonment peeling is known, we will always execute prolog.  */
3247       if (TREE_CODE (niters_prolog) == INTEGER_CST)
3248         prob_prolog = profile_probability::always ();
3249     }
3250   else
3251     niters_prolog = build_int_cst (type, 0);
3252
3253   loop_vec_info epilogue_vinfo = NULL;
3254   if (vect_epilogues)
3255     {
3256       epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
3257       loop_vinfo->epilogue_vinfos.ordered_remove (0);
3258     }
3259
3260   tree niters_vector_mult_vf = NULL_TREE;
3261   /* Saving NITERs before the loop, as this may be changed by prologue.  */
3262   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
3263   edge update_e = NULL, skip_e = NULL;
3264   unsigned int lowest_vf = constant_lower_bound (vf);
3265   /* Prolog loop may be skipped.  */
3266   bool skip_prolog = (prolog_peeling != 0);
3267   /* Skip this loop to epilog when there are not enough iterations to enter this
3268      vectorized loop.  If true we should perform runtime checks on the NITERS
3269      to check whether we should skip the current vectorized loop.  If we know
3270      the number of scalar iterations we may choose to add a runtime check if
3271      this number "maybe" smaller than the number of iterations required
3272      when we know the number of scalar iterations may potentially
3273      be smaller than the number of iterations required to enter this loop, for
3274      this we use the upper bounds on the prolog and epilog peeling.  When we
3275      don't know the number of iterations and don't require versioning it is
3276      because we have asserted that there are enough scalar iterations to enter
3277      the main loop, so this skip is not necessary.  When we are versioning then
3278      we only add such a skip if we have chosen to vectorize the epilogue.  */
3279   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3280                       ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
3281                                   bound_prolog + bound_epilog)
3282                       : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
3283                          || vect_epilogues));
3284
3285   /* Epilog loop must be executed if the number of iterations for epilog
3286      loop is known at compile time, otherwise we need to add a check at
3287      the end of vector loop and skip to the end of epilog loop.  */
3288   bool skip_epilog = (prolog_peeling < 0
3289                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3290                       || !vf.is_constant ());
3291   /* PEELING_FOR_GAPS and peeling for early breaks are special because epilog
3292      loop must be executed.  */
3293   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
3294       || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3295     skip_epilog = false;
3296
3297   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3298   auto_vec<profile_count> original_counts;
3299   basic_block *original_bbs = NULL;
3300
3301   if (skip_vector)
3302     {
3303       split_edge (loop_preheader_edge (loop));
3304
3305       if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
3306         {
3307           original_bbs = get_loop_body (loop);
3308           for (unsigned int i = 0; i < loop->num_nodes; i++)
3309             original_counts.safe_push(original_bbs[i]->count);
3310         }
3311
3312       /* Due to the order in which we peel prolog and epilog, we first
3313          propagate probability to the whole loop.  The purpose is to
3314          avoid adjusting probabilities of both prolog and vector loops
3315          separately.  Note in this case, the probability of epilog loop
3316          needs to be scaled back later.  */
3317       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
3318       if (prob_vector.initialized_p ())
3319         {
3320           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
3321           scale_loop_profile (loop, prob_vector, -1);
3322         }
3323     }
3324
3325   if (vect_epilogues)
3326     {
3327       /* Make sure to set the epilogue's epilogue scalar loop, such that we can
3328          use the original scalar loop as remaining epilogue if necessary.  */
3329       LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
3330         = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3331       LOOP_VINFO_SCALAR_IV_EXIT (epilogue_vinfo)
3332         = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3333     }
3334
3335   if (prolog_peeling)
3336     {
3337       e = loop_preheader_edge (loop);
3338       edge exit_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
3339       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, exit_e, e)
3340                            && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo));
3341
3342       /* Peel prolog and put it on preheader edge of loop.  */
3343       edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3344       edge prolog_e = NULL;
3345       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
3346                                                        scalar_loop, scalar_e,
3347                                                        e, &prolog_e);
3348       gcc_assert (prolog);
3349       prolog->force_vectorize = false;
3350
3351       first_loop = prolog;
3352       reset_original_copy_tables ();
3353
3354       /* Update the number of iterations for prolog loop.  */
3355       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
3356       vect_set_loop_condition (prolog, prolog_e, NULL, niters_prolog,
3357                                step_prolog, NULL_TREE, false);
3358
3359       /* Skip the prolog loop.  */
3360       if (skip_prolog)
3361         {
3362           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3363                                     niters_prolog, build_int_cst (type, 0));
3364           guard_bb = loop_preheader_edge (prolog)->src;
3365           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
3366           guard_to = split_edge (loop_preheader_edge (loop));
3367           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3368                                            guard_to, guard_bb,
3369                                            prob_prolog.invert (),
3370                                            irred_flag);
3371           for (edge alt_e : get_loop_exit_edges (prolog))
3372             {
3373               if (alt_e == prolog_e)
3374                 continue;
3375               basic_block old_dom
3376                 = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
3377               if (flow_bb_inside_loop_p (prolog, old_dom))
3378                 {
3379                   auto_vec<basic_block, 8> queue;
3380                   for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
3381                        son; son = next_dom_son (CDI_DOMINATORS, son))
3382                     if (!flow_bb_inside_loop_p (prolog, son))
3383                       queue.safe_push (son);
3384                   for (auto son : queue)
3385                     set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
3386                 }
3387             }
3388
3389           e = EDGE_PRED (guard_to, 0);
3390           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3391           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
3392
3393           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
3394           scale_loop_profile (prolog, prob_prolog, bound_prolog - 1);
3395         }
3396
3397       /* Update init address of DRs.  */
3398       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
3399       /* Update niters for vector loop.  */
3400       LOOP_VINFO_NITERS (loop_vinfo)
3401         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
3402       LOOP_VINFO_NITERSM1 (loop_vinfo)
3403         = fold_build2 (MINUS_EXPR, type,
3404                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3405       bool new_var_p = false;
3406       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
3407       /* It's guaranteed that vector loop bound before vectorization is at
3408          least VF, so set range information for newly generated var.  */
3409       if (new_var_p)
3410         {
3411           value_range vr (type,
3412                           wi::to_wide (build_int_cst (type, lowest_vf)),
3413                           wi::to_wide (TYPE_MAX_VALUE (type)));
3414           set_range_info (niters, vr);
3415         }
3416
3417       /* Prolog iterates at most bound_prolog times, latch iterates at
3418          most bound_prolog - 1 times.  */
3419       record_niter_bound (prolog, bound_prolog - 1, false, true);
3420       delete_update_ssa ();
3421       adjust_vec_debug_stmts ();
3422       scev_reset ();
3423     }
3424   basic_block bb_before_epilog = NULL;
3425
3426   if (epilog_peeling)
3427     {
3428       e = LOOP_VINFO_IV_EXIT (loop_vinfo);
3429       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e, e));
3430
3431       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
3432          said epilog then we should use a copy of the main loop as a starting
3433          point.  This loop may have already had some preliminary transformations
3434          to allow for more optimal vectorization, for example if-conversion.
3435          If we are not vectorizing the epilog then we should use the scalar loop
3436          as the transformations mentioned above make less or no sense when not
3437          vectorizing.  */
3438       edge scalar_e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
3439       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
3440       edge epilog_e = vect_epilogues ? e : scalar_e;
3441       edge new_epilog_e = NULL;
3442       auto_vec<basic_block> doms;
3443       epilog
3444         = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
3445                                                   &new_epilog_e, true, &doms);
3446
3447       LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo) = new_epilog_e;
3448       gcc_assert (epilog);
3449       gcc_assert (new_epilog_e);
3450       epilog->force_vectorize = false;
3451       bb_before_epilog = loop_preheader_edge (epilog)->src;
3452
3453       /* Scalar version loop may be preferred.  In this case, add guard
3454          and skip to epilog.  Note this only happens when the number of
3455          iterations of loop is unknown at compile time, otherwise this
3456          won't be vectorized.  */
3457       if (skip_vector)
3458         {
3459           /* Additional epilogue iteration is peeled if gap exists.  */
3460           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
3461                                                 bound_prolog, bound_epilog,
3462                                                 th, &bound_scalar,
3463                                                 check_profitability);
3464           /* Build guard against NITERSM1 since NITERS may overflow.  */
3465           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
3466           guard_bb = anchor;
3467           guard_to = split_edge (loop_preheader_edge (epilog));
3468           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3469                                            guard_to, guard_bb,
3470                                            prob_vector.invert (),
3471                                            irred_flag);
3472           skip_e = guard_e;
3473           e = EDGE_PRED (guard_to, 0);
3474           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3475           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
3476
3477           /* Simply propagate profile info from guard_bb to guard_to which is
3478              a merge point of control flow.  */
3479           profile_count old_count = guard_to->count;
3480           guard_to->count = guard_bb->count;
3481
3482           /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
3483           if (vect_epilogues || scalar_loop == NULL)
3484             {
3485               gcc_assert(epilog->num_nodes == loop->num_nodes);
3486               basic_block *bbs = get_loop_body (epilog);
3487               for (unsigned int i = 0; i < epilog->num_nodes; i++)
3488                 {
3489                   gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
3490                   bbs[i]->count = original_counts[i];
3491                 }
3492               free (bbs);
3493               free (original_bbs);
3494             }
3495           else if (old_count.nonzero_p ())
3496             scale_loop_profile (epilog, guard_to->count.probability_in (old_count), -1);
3497
3498           /* Only need to handle basic block before epilog loop if it's not
3499              the guard_bb, which is the case when skip_vector is true.  */
3500           if (guard_bb != bb_before_epilog && single_pred_p (bb_before_epilog))
3501             bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count ();
3502           bb_before_epilog = loop_preheader_edge (epilog)->src;
3503         }
3504
3505       /* If loop is peeled for non-zero constant times, now niters refers to
3506          orig_niters - prolog_peeling, it won't overflow even the orig_niters
3507          overflows.  */
3508       niters_no_overflow |= (prolog_peeling > 0);
3509       vect_gen_vector_loop_niters (loop_vinfo, niters,
3510                                    niters_vector, step_vector,
3511                                    niters_no_overflow);
3512       if (!integer_onep (*step_vector))
3513         {
3514           /* On exit from the loop we will have an easy way of calcalating
3515              NITERS_VECTOR / STEP * STEP.  Install a dummy definition
3516              until then.  */
3517           niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
3518           SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
3519           *niters_vector_mult_vf_var = niters_vector_mult_vf;
3520         }
3521       else
3522         vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
3523                                              &niters_vector_mult_vf);
3524       /* Update IVs of original loop as if they were advanced by
3525          niters_vector_mult_vf steps.  */
3526       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
3527       update_e = skip_vector ? e : loop_preheader_edge (epilog);
3528       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3529         update_e = single_succ_edge (LOOP_VINFO_IV_EXIT (loop_vinfo)->dest);
3530
3531       /* If we have a peeled vector iteration, all exits are the same, leave it
3532          and so the main exit needs to be treated the same as the alternative
3533          exits in that we leave their updates to vectorizable_live_operations.
3534          */
3535       if (!LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
3536         vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
3537                                           update_e);
3538
3539       /* If we have a peeled vector iteration we will never skip the epilog loop
3540          and we can simplify the cfg a lot by not doing the edge split.  */
3541       if (skip_epilog || LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3542         {
3543           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3544                                     niters, niters_vector_mult_vf);
3545
3546           guard_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
3547           edge epilog_e = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
3548           guard_to = epilog_e->dest;
3549           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
3550                                            skip_vector ? anchor : guard_bb,
3551                                            prob_epilog.invert (),
3552                                            irred_flag);
3553           doms.safe_push (guard_to);
3554           if (vect_epilogues)
3555             epilogue_vinfo->skip_this_loop_edge = guard_e;
3556           edge main_iv = LOOP_VINFO_IV_EXIT (loop_vinfo);
3557           gphi_iterator gsi2 = gsi_start_phis (main_iv->dest);
3558           for (gphi_iterator gsi = gsi_start_phis (guard_to);
3559                !gsi_end_p (gsi); gsi_next (&gsi))
3560             {
3561               /* We are expecting all of the PHIs we have on epilog_e
3562                  to be also on the main loop exit.  But sometimes
3563                  a stray virtual definition can appear at epilog_e
3564                  which we can then take as the same on all exits,
3565                  we've removed the LC SSA PHI on the main exit before
3566                  so we wouldn't need to create a loop PHI for it.  */
3567               if (virtual_operand_p (gimple_phi_result (*gsi))
3568                   && (gsi_end_p (gsi2)
3569                       || !virtual_operand_p (gimple_phi_result (*gsi2))))
3570                 add_phi_arg (*gsi,
3571                              gimple_phi_arg_def_from_edge (*gsi, epilog_e),
3572                              guard_e, UNKNOWN_LOCATION);
3573               else
3574                 {
3575                   add_phi_arg (*gsi, gimple_phi_result (*gsi2), guard_e,
3576                                UNKNOWN_LOCATION);
3577                   gsi_next (&gsi2);
3578                 }
3579             }
3580
3581           /* Only need to handle basic block before epilog loop if it's not
3582              the guard_bb, which is the case when skip_vector is true.  */
3583           if (guard_bb != bb_before_epilog)
3584             {
3585               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
3586
3587               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
3588             }
3589           scale_loop_profile (epilog, prob_epilog, -1);
3590         }
3591
3592       /* Recalculate the dominators after adding the guard edge.  */
3593       if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3594         iterate_fix_dominators (CDI_DOMINATORS, doms, false);
3595
3596       /* When we do not have a loop-around edge to the epilog we know
3597          the vector loop covered at least VF scalar iterations unless
3598          we have early breaks.
3599          Update any known upper bound with this knowledge.  */
3600       if (! skip_vector
3601           && ! LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3602         {
3603           if (epilog->any_upper_bound)
3604             epilog->nb_iterations_upper_bound -= lowest_vf;
3605           if (epilog->any_likely_upper_bound)
3606             epilog->nb_iterations_likely_upper_bound -= lowest_vf;
3607           if (epilog->any_estimate)
3608             epilog->nb_iterations_estimate -= lowest_vf;
3609         }
3610
3611       unsigned HOST_WIDE_INT bound;
3612       if (bound_scalar.is_constant (&bound))
3613         {
3614           gcc_assert (bound != 0);
3615           /* Adjust the upper bound by the extra peeled vector iteration if we
3616              are an epilogue of an peeled vect loop and not VLA.  For VLA the
3617              loop bounds are unknown.  */
3618           if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
3619               && vf.is_constant ())
3620             bound += vf.to_constant ();
3621           /* -1 to convert loop iterations to latch iterations.  */
3622           record_niter_bound (epilog, bound - 1, false, true);
3623           scale_loop_profile (epilog, profile_probability::always (),
3624                               bound - 1);
3625         }
3626
3627       delete_update_ssa ();
3628       adjust_vec_debug_stmts ();
3629       scev_reset ();
3630     }
3631
3632   if (vect_epilogues)
3633     {
3634       epilog->aux = epilogue_vinfo;
3635       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
3636       LOOP_VINFO_IV_EXIT (epilogue_vinfo)
3637         = LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo);
3638
3639       loop_constraint_clear (epilog, LOOP_C_INFINITE);
3640
3641       /* We now must calculate the number of NITERS performed by the previous
3642          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
3643       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
3644                                  niters_prolog, niters_vector_mult_vf);
3645
3646       /* If skip_vector we may skip the previous loop, we insert a phi-node to
3647          determine whether we are coming from the previous vectorized loop
3648          using the update_e edge or the skip_vector basic block using the
3649          skip_e edge.  */
3650       if (skip_vector)
3651         {
3652           gcc_assert (update_e != NULL && skip_e != NULL);
3653           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
3654                                            update_e->dest);
3655           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
3656           gimple *stmt = gimple_build_assign (new_ssa, niters);
3657           gimple_stmt_iterator gsi;
3658           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
3659               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
3660             {
3661               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
3662               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
3663             }
3664           else
3665             {
3666               gsi = gsi_last_bb (update_e->src);
3667               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3668             }
3669
3670           niters = new_ssa;
3671           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
3672           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
3673                        UNKNOWN_LOCATION);
3674           niters = PHI_RESULT (new_phi);
3675           epilogue_vinfo->main_loop_edge = update_e;
3676           epilogue_vinfo->skip_main_loop_edge = skip_e;
3677         }
3678
3679       /* Set ADVANCE to the number of iterations performed by the previous
3680          loop and its prologue.  */
3681       *advance = niters;
3682
3683       /* Subtract the number of iterations performed by the vectorized loop
3684          from the number of total iterations.  */
3685       tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
3686                                           before_loop_niters,
3687                                           niters);
3688
3689       LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
3690       LOOP_VINFO_NITERSM1 (epilogue_vinfo)
3691         = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
3692                        epilogue_niters,
3693                        build_one_cst (TREE_TYPE (epilogue_niters)));
3694
3695       /* Decide what to do if the number of epilogue iterations is not
3696          a multiple of the epilogue loop's vectorization factor.
3697          We should have rejected the loop during the analysis phase
3698          if this fails.  */
3699       bool res = vect_determine_partial_vectors_and_peeling (epilogue_vinfo);
3700       gcc_assert (res);
3701     }
3702
3703   adjust_vec.release ();
3704   free_original_copy_tables ();
3705
3706   return vect_epilogues ? epilog : NULL;
3707 }
3708
3709 /* Function vect_create_cond_for_niters_checks.
3710
3711    Create a conditional expression that represents the run-time checks for
3712    loop's niter.  The loop is guaranteed to terminate if the run-time
3713    checks hold.
3714
3715    Input:
3716    COND_EXPR  - input conditional expression.  New conditions will be chained
3717                 with logical AND operation.  If it is NULL, then the function
3718                 is used to return the number of alias checks.
3719    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3720                 to be checked.
3721
3722    Output:
3723    COND_EXPR - conditional expression.
3724
3725    The returned COND_EXPR is the conditional expression to be used in the
3726    if statement that controls which version of the loop gets executed at
3727    runtime.  */
3728
3729 static void
3730 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
3731 {
3732   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
3733
3734   if (*cond_expr)
3735     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3736                               *cond_expr, part_cond_expr);
3737   else
3738     *cond_expr = part_cond_expr;
3739 }
3740
3741 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3742    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
3743
3744 static void
3745 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
3746 {
3747   if (*cond_expr)
3748     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3749                               *cond_expr, part_cond_expr);
3750   else
3751     *cond_expr = part_cond_expr;
3752 }
3753
3754 /* Function vect_create_cond_for_align_checks.
3755
3756    Create a conditional expression that represents the alignment checks for
3757    all of data references (array element references) whose alignment must be
3758    checked at runtime.
3759
3760    Input:
3761    COND_EXPR  - input conditional expression.  New conditions will be chained
3762                 with logical AND operation.
3763    LOOP_VINFO - two fields of the loop information are used.
3764                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3765                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3766
3767    Output:
3768    COND_EXPR_STMT_LIST - statements needed to construct the conditional
3769                          expression.
3770    The returned value is the conditional expression to be used in the if
3771    statement that controls which version of the loop gets executed at runtime.
3772
3773    The algorithm makes two assumptions:
3774      1) The number of bytes "n" in a vector is a power of 2.
3775      2) An address "a" is aligned if a%n is zero and that this
3776         test can be done as a&(n-1) == 0.  For example, for 16
3777         byte vectors the test is a&0xf == 0.  */
3778
3779 static void
3780 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3781                                    tree *cond_expr,
3782                                    gimple_seq *cond_expr_stmt_list)
3783 {
3784   const vec<stmt_vec_info> &may_misalign_stmts
3785     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3786   stmt_vec_info stmt_info;
3787   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3788   tree mask_cst;
3789   unsigned int i;
3790   tree int_ptrsize_type;
3791   char tmp_name[20];
3792   tree or_tmp_name = NULL_TREE;
3793   tree and_tmp_name;
3794   gimple *and_stmt;
3795   tree ptrsize_zero;
3796   tree part_cond_expr;
3797
3798   /* Check that mask is one less than a power of 2, i.e., mask is
3799      all zeros followed by all ones.  */
3800   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3801
3802   int_ptrsize_type = signed_type_for (ptr_type_node);
3803
3804   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3805      of the first vector of the i'th data reference. */
3806
3807   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3808     {
3809       gimple_seq new_stmt_list = NULL;
3810       tree addr_base;
3811       tree addr_tmp_name;
3812       tree new_or_tmp_name;
3813       gimple *addr_stmt, *or_stmt;
3814       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3815       bool negative = tree_int_cst_compare
3816         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
3817       tree offset = negative
3818         ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
3819                     * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
3820         : size_zero_node;
3821
3822       /* create: addr_tmp = (int)(address_of_first_vector) */
3823       addr_base =
3824         vect_create_addr_base_for_vector_ref (loop_vinfo,
3825                                               stmt_info, &new_stmt_list,
3826                                               offset);
3827       if (new_stmt_list != NULL)
3828         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
3829
3830       sprintf (tmp_name, "addr2int%d", i);
3831       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3832       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
3833       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
3834
3835       /* The addresses are OR together.  */
3836
3837       if (or_tmp_name != NULL_TREE)
3838         {
3839           /* create: or_tmp = or_tmp | addr_tmp */
3840           sprintf (tmp_name, "orptrs%d", i);
3841           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3842           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
3843                                          or_tmp_name, addr_tmp_name);
3844           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
3845           or_tmp_name = new_or_tmp_name;
3846         }
3847       else
3848         or_tmp_name = addr_tmp_name;
3849
3850     } /* end for i */
3851
3852   mask_cst = build_int_cst (int_ptrsize_type, mask);
3853
3854   /* create: and_tmp = or_tmp & mask  */
3855   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
3856
3857   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
3858                                   or_tmp_name, mask_cst);
3859   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
3860
3861   /* Make and_tmp the left operand of the conditional test against zero.
3862      if and_tmp has a nonzero bit then some address is unaligned.  */
3863   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3864   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
3865                                 and_tmp_name, ptrsize_zero);
3866   chain_cond_expr (cond_expr, part_cond_expr);
3867 }
3868
3869 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
3870    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
3871    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3872    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
3873
3874 static void
3875 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
3876 {
3877   const vec<vec_object_pair> &pairs
3878     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3879   unsigned int i;
3880   vec_object_pair *pair;
3881   FOR_EACH_VEC_ELT (pairs, i, pair)
3882     {
3883       tree addr1 = build_fold_addr_expr (pair->first);
3884       tree addr2 = build_fold_addr_expr (pair->second);
3885       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
3886                                          addr1, addr2);
3887       chain_cond_expr (cond_expr, part_cond_expr);
3888     }
3889 }
3890
3891 /* Create an expression that is true when all lower-bound conditions for
3892    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
3893
3894 static void
3895 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
3896 {
3897   const vec<vec_lower_bound> &lower_bounds
3898     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3899   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3900     {
3901       tree expr = lower_bounds[i].expr;
3902       tree type = unsigned_type_for (TREE_TYPE (expr));
3903       expr = fold_convert (type, expr);
3904       poly_uint64 bound = lower_bounds[i].min_value;
3905       if (!lower_bounds[i].unsigned_p)
3906         {
3907           expr = fold_build2 (PLUS_EXPR, type, expr,
3908                               build_int_cstu (type, bound - 1));
3909           bound += bound - 1;
3910         }
3911       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
3912                                          build_int_cstu (type, bound));
3913       chain_cond_expr (cond_expr, part_cond_expr);
3914     }
3915 }
3916
3917 /* Function vect_create_cond_for_alias_checks.
3918
3919    Create a conditional expression that represents the run-time checks for
3920    overlapping of address ranges represented by a list of data references
3921    relations passed as input.
3922
3923    Input:
3924    COND_EXPR  - input conditional expression.  New conditions will be chained
3925                 with logical AND operation.  If it is NULL, then the function
3926                 is used to return the number of alias checks.
3927    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3928                 to be checked.
3929
3930    Output:
3931    COND_EXPR - conditional expression.
3932
3933    The returned COND_EXPR is the conditional expression to be used in the if
3934    statement that controls which version of the loop gets executed at runtime.
3935 */
3936
3937 void
3938 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
3939 {
3940   const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
3941     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3942
3943   if (comp_alias_ddrs.is_empty ())
3944     return;
3945
3946   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
3947                                &comp_alias_ddrs, cond_expr);
3948   if (dump_enabled_p ())
3949     dump_printf_loc (MSG_NOTE, vect_location,
3950                      "created %u versioning for alias checks.\n",
3951                      comp_alias_ddrs.length ());
3952 }
3953
3954
3955 /* Function vect_loop_versioning.
3956
3957    If the loop has data references that may or may not be aligned or/and
3958    has data reference relations whose independence was not proven then
3959    two versions of the loop need to be generated, one which is vectorized
3960    and one which isn't.  A test is then generated to control which of the
3961    loops is executed.  The test checks for the alignment of all of the
3962    data references that may or may not be aligned.  An additional
3963    sequence of runtime tests is generated for each pairs of DDRs whose
3964    independence was not proven.  The vectorized version of loop is
3965    executed only if both alias and alignment tests are passed.
3966
3967    The test generated to check which version of loop is executed
3968    is modified to also check for profitability as indicated by the
3969    cost model threshold TH.
3970
3971    The versioning precondition(s) are placed in *COND_EXPR and
3972    *COND_EXPR_STMT_LIST.  */
3973
3974 class loop *
3975 vect_loop_versioning (loop_vec_info loop_vinfo,
3976                       gimple *loop_vectorized_call)
3977 {
3978   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
3979   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3980   basic_block condition_bb;
3981   gphi_iterator gsi;
3982   gimple_stmt_iterator cond_exp_gsi;
3983   basic_block merge_bb;
3984   basic_block new_exit_bb;
3985   edge new_exit_e, e;
3986   gphi *orig_phi, *new_phi;
3987   tree cond_expr = NULL_TREE;
3988   gimple_seq cond_expr_stmt_list = NULL;
3989   tree arg;
3990   profile_probability prob = profile_probability::likely ();
3991   gimple_seq gimplify_stmt_list = NULL;
3992   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
3993   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3994   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
3995   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
3996   poly_uint64 versioning_threshold
3997     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3998   tree version_simd_if_cond
3999     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
4000   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
4001
4002   if (vect_apply_runtime_profitability_check_p (loop_vinfo)
4003       && !ordered_p (th, versioning_threshold))
4004     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
4005                              build_int_cst (TREE_TYPE (scalar_loop_iters),
4006                                             th - 1));
4007   if (maybe_ne (versioning_threshold, 0U))
4008     {
4009       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
4010                                build_int_cst (TREE_TYPE (scalar_loop_iters),
4011                                               versioning_threshold - 1));
4012       if (cond_expr)
4013         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
4014                                  expr, cond_expr);
4015       else
4016         cond_expr = expr;
4017     }
4018
4019   tree cost_name = NULL_TREE;
4020   profile_probability prob2 = profile_probability::always ();
4021   if (cond_expr
4022       && EXPR_P (cond_expr)
4023       && (version_niter
4024           || version_align
4025           || version_alias
4026           || version_simd_if_cond))
4027     {
4028       cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
4029                                                       &cond_expr_stmt_list,
4030                                                       is_gimple_val, NULL_TREE);
4031       /* Split prob () into two so that the overall probability of passing
4032          both the cost-model and versioning checks is the orig prob.  */
4033       prob2 = prob = prob.sqrt ();
4034     }
4035
4036   if (version_niter)
4037     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
4038
4039   if (cond_expr)
4040     {
4041       gimple_seq tem = NULL;
4042       cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
4043                                           &tem, is_gimple_condexpr_for_cond,
4044                                           NULL_TREE);
4045       gimple_seq_add_seq (&cond_expr_stmt_list, tem);
4046     }
4047
4048   if (version_align)
4049     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
4050                                        &cond_expr_stmt_list);
4051
4052   if (version_alias)
4053     {
4054       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
4055       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
4056       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
4057     }
4058
4059   if (version_simd_if_cond)
4060     {
4061       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
4062       if (flag_checking)
4063         if (basic_block bb
4064             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
4065           gcc_assert (bb != loop->header
4066                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
4067                       && (scalar_loop == NULL
4068                           || (bb != scalar_loop->header
4069                               && dominated_by_p (CDI_DOMINATORS,
4070                                                  scalar_loop->header, bb))));
4071       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
4072       tree c = fold_build2 (NE_EXPR, boolean_type_node,
4073                             version_simd_if_cond, zero);
4074       if (cond_expr)
4075         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
4076                                  c, cond_expr);
4077       else
4078         cond_expr = c;
4079       if (dump_enabled_p ())
4080         dump_printf_loc (MSG_NOTE, vect_location,
4081                          "created versioning for simd if condition check.\n");
4082     }
4083
4084   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
4085                                       &gimplify_stmt_list,
4086                                       is_gimple_condexpr_for_cond, NULL_TREE);
4087   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
4088
4089   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
4090      invariant in.  */
4091   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
4092   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
4093        !gsi_end_p (gsi); gsi_next (&gsi))
4094     {
4095       gimple *stmt = gsi_stmt (gsi);
4096       update_stmt (stmt);
4097       ssa_op_iter iter;
4098       use_operand_p use_p;
4099       basic_block def_bb;
4100       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
4101         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
4102             && flow_bb_inside_loop_p (outermost, def_bb))
4103           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
4104     }
4105
4106   /* Search for the outermost loop we can version.  Avoid versioning of
4107      non-perfect nests but allow if-conversion versioned loops inside.  */
4108   class loop *loop_to_version = loop;
4109   if (flow_loop_nested_p (outermost, loop))
4110     {
4111       if (dump_enabled_p ())
4112         dump_printf_loc (MSG_NOTE, vect_location,
4113                          "trying to apply versioning to outer loop %d\n",
4114                          outermost->num);
4115       if (outermost->num == 0)
4116         outermost = superloop_at_depth (loop, 1);
4117       /* And avoid applying versioning on non-perfect nests.  */
4118       while (loop_to_version != outermost
4119              && (e = single_exit (loop_outer (loop_to_version)))
4120              && !(e->flags & EDGE_COMPLEX)
4121              && (!loop_outer (loop_to_version)->inner->next
4122                  || vect_loop_vectorized_call (loop_to_version))
4123              && (!loop_outer (loop_to_version)->inner->next
4124                  || !loop_outer (loop_to_version)->inner->next->next))
4125         loop_to_version = loop_outer (loop_to_version);
4126     }
4127
4128   /* Apply versioning.  If there is already a scalar version created by
4129      if-conversion re-use that.  Note we cannot re-use the copy of
4130      an if-converted outer-loop when vectorizing the inner loop only.  */
4131   gcond *cond;
4132   if ((!loop_to_version->inner || loop == loop_to_version)
4133       && loop_vectorized_call)
4134     {
4135       gcc_assert (scalar_loop);
4136       condition_bb = gimple_bb (loop_vectorized_call);
4137       cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
4138       gimple_cond_set_condition_from_tree (cond, cond_expr);
4139       update_stmt (cond);
4140
4141       if (cond_expr_stmt_list)
4142         {
4143           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
4144           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
4145                                  GSI_SAME_STMT);
4146         }
4147
4148       /* if-conversion uses profile_probability::always () for both paths,
4149          reset the paths probabilities appropriately.  */
4150       edge te, fe;
4151       extract_true_false_edges_from_block (condition_bb, &te, &fe);
4152       te->probability = prob;
4153       fe->probability = prob.invert ();
4154       /* We can scale loops counts immediately but have to postpone
4155          scaling the scalar loop because we re-use it during peeling.
4156
4157          Ifcvt duplicates loop preheader, loop body and produces an basic
4158          block after loop exit.  We need to scale all that.  */
4159       basic_block preheader = loop_preheader_edge (loop_to_version)->src;
4160       preheader->count = preheader->count.apply_probability (prob * prob2);
4161       scale_loop_frequencies (loop_to_version, prob * prob2);
4162       /* When the loop has multiple exits then we can only version itself.
4163         This is denoted by loop_to_version == loop.  In this case we can
4164         do the versioning by selecting the exit edge the vectorizer is
4165         currently using.  */
4166       edge exit_edge;
4167       if (loop_to_version == loop)
4168        exit_edge = LOOP_VINFO_IV_EXIT (loop_vinfo);
4169       else
4170        exit_edge = single_exit (loop_to_version);
4171       exit_edge->dest->count = preheader->count;
4172       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = (prob * prob2).invert ();
4173
4174       nloop = scalar_loop;
4175       if (dump_enabled_p ())
4176         dump_printf_loc (MSG_NOTE, vect_location,
4177                          "reusing %sloop version created by if conversion\n",
4178                          loop_to_version != loop ? "outer " : "");
4179     }
4180   else
4181     {
4182       if (loop_to_version != loop
4183           && dump_enabled_p ())
4184         dump_printf_loc (MSG_NOTE, vect_location,
4185                          "applying loop versioning to outer loop %d\n",
4186                          loop_to_version->num);
4187
4188       unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
4189
4190       initialize_original_copy_tables ();
4191       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
4192                             prob * prob2, (prob * prob2).invert (),
4193                             prob * prob2, (prob * prob2).invert (),
4194                             true);
4195       /* We will later insert second conditional so overall outcome of
4196          both is prob * prob2.  */
4197       edge true_e, false_e;
4198       extract_true_false_edges_from_block (condition_bb, &true_e, &false_e);
4199       true_e->probability = prob;
4200       false_e->probability = prob.invert ();
4201       gcc_assert (nloop);
4202       nloop = get_loop_copy (loop);
4203
4204       /* For cycle vectorization with SLP we rely on the PHI arguments
4205          appearing in the same order as the SLP node operands which for the
4206          loop PHI nodes means the preheader edge dest index needs to remain
4207          the same for the analyzed loop which also becomes the vectorized one.
4208          Make it so in case the state after versioning differs by redirecting
4209          the first edge into the header to the same destination which moves
4210          it last.  */
4211       if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
4212         {
4213           edge e = EDGE_PRED (loop->header, 0);
4214           ssa_redirect_edge (e, e->dest);
4215           flush_pending_stmts (e);
4216         }
4217       gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
4218
4219       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
4220          reap those otherwise;  they also refer to the original
4221          loops.  */
4222       class loop *l = loop;
4223       while (gimple *call = vect_loop_vectorized_call (l))
4224         {
4225           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
4226           fold_loop_internal_call (call, boolean_false_node);
4227           l = loop_outer (l);
4228         }
4229       free_original_copy_tables ();
4230
4231       if (cond_expr_stmt_list)
4232         {
4233           cond_exp_gsi = gsi_last_bb (condition_bb);
4234           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
4235                                  GSI_SAME_STMT);
4236         }
4237
4238       /* Loop versioning violates an assumption we try to maintain during
4239          vectorization - that the loop exit block has a single predecessor.
4240          After versioning, the exit block of both loop versions is the same
4241          basic block (i.e. it has two predecessors). Just in order to simplify
4242          following transformations in the vectorizer, we fix this situation
4243          here by adding a new (empty) block on the exit-edge of the loop,
4244          with the proper loop-exit phis to maintain loop-closed-form.
4245          If loop versioning wasn't done from loop, but scalar_loop instead,
4246          merge_bb will have already just a single successor.  */
4247
4248       /* When the loop has multiple exits then we can only version itself.
4249          This is denoted by loop_to_version == loop.  In this case we can
4250          do the versioning by selecting the exit edge the vectorizer is
4251          currently using.  */
4252       edge exit_edge;
4253       if (loop_to_version == loop)
4254         exit_edge = LOOP_VINFO_IV_EXIT (loop_vinfo);
4255       else
4256         exit_edge = single_exit (loop_to_version);
4257
4258       gcc_assert (exit_edge);
4259       merge_bb = exit_edge->dest;
4260       if (EDGE_COUNT (merge_bb->preds) >= 2)
4261         {
4262           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
4263           new_exit_bb = split_edge (exit_edge);
4264           new_exit_e = exit_edge;
4265           e = EDGE_SUCC (new_exit_bb, 0);
4266
4267           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
4268                gsi_next (&gsi))
4269             {
4270               tree new_res;
4271               orig_phi = gsi.phi ();
4272               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
4273               new_phi = create_phi_node (new_res, new_exit_bb);
4274               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4275               add_phi_arg (new_phi, arg, new_exit_e,
4276                            gimple_phi_arg_location_from_edge (orig_phi, e));
4277               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
4278             }
4279         }
4280
4281       update_ssa (TODO_update_ssa_no_phi);
4282     }
4283
4284   /* Split the cost model check off to a separate BB.  Costing assumes
4285      this is the only thing we perform when we enter the scalar loop
4286      from a failed cost decision.  */
4287   if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
4288     {
4289       gimple *def = SSA_NAME_DEF_STMT (cost_name);
4290       gcc_assert (gimple_bb (def) == condition_bb);
4291       /* All uses of the cost check are 'true' after the check we
4292          are going to insert.  */
4293       replace_uses_by (cost_name, boolean_true_node);
4294       /* And we're going to build the new single use of it.  */
4295       gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
4296                                        NULL_TREE, NULL_TREE);
4297       edge e = split_block (gimple_bb (def), def);
4298       gimple_stmt_iterator gsi = gsi_for_stmt (def);
4299       gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
4300       edge true_e, false_e;
4301       extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
4302       e->flags &= ~EDGE_FALLTHRU;
4303       e->flags |= EDGE_TRUE_VALUE;
4304       edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
4305       e->probability = prob2;
4306       e2->probability = prob2.invert ();
4307       e->dest->count = e->count ();
4308       set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
4309       auto_vec<basic_block, 3> adj;
4310       for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
4311            son;
4312            son = next_dom_son (CDI_DOMINATORS, son))
4313         if (EDGE_COUNT (son->preds) > 1)
4314           adj.safe_push (son);
4315       for (auto son : adj)
4316         set_immediate_dominator (CDI_DOMINATORS, son, e->src);
4317       //debug_bb (condition_bb);
4318       //debug_bb (e->src);
4319     }
4320
4321   if (version_niter)
4322     {
4323       /* The versioned loop could be infinite, we need to clear existing
4324          niter information which is copied from the original loop.  */
4325       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
4326       vect_free_loop_info_assumptions (nloop);
4327     }
4328
4329   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
4330       && dump_enabled_p ())
4331     {
4332       if (version_alias)
4333         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
4334                          vect_location,
4335                          "loop versioned for vectorization because of "
4336                          "possible aliasing\n");
4337       if (version_align)
4338         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
4339                          vect_location,
4340                          "loop versioned for vectorization to enhance "
4341                          "alignment\n");
4342
4343     }
4344
4345   return nloop;
4346 }