gcc/tree-vect-loop-manip.cc

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44 #include "gimple-fold.h"
  45 #include "tree-ssa-loop-niter.h"
  46 #include "internal-fn.h"
  47 #include "stor-layout.h"
  48 #include "optabs-query.h"
  49 #include "vec-perm-indices.h"
  50 #include "insn-config.h"
  51 #include "rtl.h"
  52 #include "recog.h"
  53
  54 /*************************************************************************
  55   Simple Loop Peeling Utilities
  56
  57   Utilities to support loop peeling for vectorization purposes.
  58  *************************************************************************/
  59
  60
  61 /* Renames the use *OP_P.  */
  62
  63 static void
  64 rename_use_op (use_operand_p op_p)
  65 {
  66   tree new_name;
  67
  68   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  69     return;
  70
  71   new_name = get_current_def (USE_FROM_PTR (op_p));
  72
  73   /* Something defined outside of the loop.  */
  74   if (!new_name)
  75     return;
  76
  77   /* An ordinary ssa name defined in the loop.  */
  78
  79   SET_USE (op_p, new_name);
  80 }
  81
  82
  83 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  84    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  85    true.  */
  86
  87 static void
  88 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  89 {
  90   gimple *stmt;
  91   use_operand_p use_p;
  92   ssa_op_iter iter;
  93   edge e;
  94   edge_iterator ei;
  95   class loop *loop = bb->loop_father;
  96   class loop *outer_loop = NULL;
  97
  98   if (rename_from_outer_loop)
  99     {
 100       gcc_assert (loop);
 101       outer_loop = loop_outer (loop);
 102     }
 103
 104   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
 105        gsi_next (&gsi))
 106     {
 107       stmt = gsi_stmt (gsi);
 108       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 109         rename_use_op (use_p);
 110     }
 111
 112   FOR_EACH_EDGE (e, ei, bb->preds)
 113     {
 114       if (!flow_bb_inside_loop_p (loop, e->src))
 115         {
 116           if (!rename_from_outer_loop)
 117             continue;
 118           if (e->src != outer_loop->header)
 119             {
 120               if (outer_loop->inner->next)
 121                 {
 122                   /* If outer_loop has 2 inner loops, allow there to
 123                      be an extra basic block which decides which of the
 124                      two loops to use using LOOP_VECTORIZED.  */
 125                   if (!single_pred_p (e->src)
 126                       || single_pred (e->src) != outer_loop->header)
 127                     continue;
 128                 }
 129             }
 130         }
 131       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 132            gsi_next (&gsi))
 133         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 134     }
 135 }
 136
 137
 138 struct adjust_info
 139 {
 140   tree from, to;
 141   basic_block bb;
 142 };
 143
 144 /* A stack of values to be adjusted in debug stmts.  We have to
 145    process them LIFO, so that the closest substitution applies.  If we
 146    processed them FIFO, without the stack, we might substitute uses
 147    with a PHI DEF that would soon become non-dominant, and when we got
 148    to the suitable one, it wouldn't have anything to substitute any
 149    more.  */
 150 static vec<adjust_info, va_heap> adjust_vec;
 151
 152 /* Adjust any debug stmts that referenced AI->from values to use the
 153    loop-closed AI->to, if the references are dominated by AI->bb and
 154    not by the definition of AI->from.  */
 155
 156 static void
 157 adjust_debug_stmts_now (adjust_info *ai)
 158 {
 159   basic_block bbphi = ai->bb;
 160   tree orig_def = ai->from;
 161   tree new_def = ai->to;
 162   imm_use_iterator imm_iter;
 163   gimple *stmt;
 164   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 165
 166   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 167
 168   /* Adjust any debug stmts that held onto non-loop-closed
 169      references.  */
 170   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 171     {
 172       use_operand_p use_p;
 173       basic_block bbuse;
 174
 175       if (!is_gimple_debug (stmt))
 176         continue;
 177
 178       gcc_assert (gimple_debug_bind_p (stmt));
 179
 180       bbuse = gimple_bb (stmt);
 181
 182       if ((bbuse == bbphi
 183            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 184           && !(bbuse == bbdef
 185                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 186         {
 187           if (new_def)
 188             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 189               SET_USE (use_p, new_def);
 190           else
 191             {
 192               gimple_debug_bind_reset_value (stmt);
 193               update_stmt (stmt);
 194             }
 195         }
 196     }
 197 }
 198
 199 /* Adjust debug stmts as scheduled before.  */
 200
 201 static void
 202 adjust_vec_debug_stmts (void)
 203 {
 204   if (!MAY_HAVE_DEBUG_BIND_STMTS)
 205     return;
 206
 207   gcc_assert (adjust_vec.exists ());
 208
 209   while (!adjust_vec.is_empty ())
 210     {
 211       adjust_debug_stmts_now (&adjust_vec.last ());
 212       adjust_vec.pop ();
 213     }
 214 }
 215
 216 /* Adjust any debug stmts that referenced FROM values to use the
 217    loop-closed TO, if the references are dominated by BB and not by
 218    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 219    will be postponed until adjust_vec_debug_stmts is called.  */
 220
 221 static void
 222 adjust_debug_stmts (tree from, tree to, basic_block bb)
 223 {
 224   adjust_info ai;
 225
 226   if (MAY_HAVE_DEBUG_BIND_STMTS
 227       && TREE_CODE (from) == SSA_NAME
 228       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 229       && ! virtual_operand_p (from))
 230     {
 231       ai.from = from;
 232       ai.to = to;
 233       ai.bb = bb;
 234
 235       if (adjust_vec.exists ())
 236         adjust_vec.safe_push (ai);
 237       else
 238         adjust_debug_stmts_now (&ai);
 239     }
 240 }
 241
 242 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 243    to adjust any debug stmts that referenced the old phi arg,
 244    presumably non-loop-closed references left over from other
 245    transformations.  */
 246
 247 static void
 248 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 249 {
 250   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 251
 252   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 253
 254   if (MAY_HAVE_DEBUG_BIND_STMTS)
 255     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 256                         gimple_bb (update_phi));
 257 }
 258
 259 /* Define one loop rgroup control CTRL from loop LOOP.  INIT_CTRL is the value
 260    that the control should have during the first iteration and NEXT_CTRL is the
 261    value that it should have on subsequent iterations.  */
 262
 263 static void
 264 vect_set_loop_control (class loop *loop, tree ctrl, tree init_ctrl,
 265                        tree next_ctrl)
 266 {
 267   gphi *phi = create_phi_node (ctrl, loop->header);
 268   add_phi_arg (phi, init_ctrl, loop_preheader_edge (loop), UNKNOWN_LOCATION);
 269   add_phi_arg (phi, next_ctrl, loop_latch_edge (loop), UNKNOWN_LOCATION);
 270 }
 271
 272 /* Add SEQ to the end of LOOP's preheader block.  */
 273
 274 static void
 275 add_preheader_seq (class loop *loop, gimple_seq seq)
 276 {
 277   if (seq)
 278     {
 279       edge pe = loop_preheader_edge (loop);
 280       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
 281       gcc_assert (!new_bb);
 282     }
 283 }
 284
 285 /* Add SEQ to the beginning of LOOP's header block.  */
 286
 287 static void
 288 add_header_seq (class loop *loop, gimple_seq seq)
 289 {
 290   if (seq)
 291     {
 292       gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
 293       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
 294     }
 295 }
 296
 297 /* Return true if the target can interleave elements of two vectors.
 298    OFFSET is 0 if the first half of the vectors should be interleaved
 299    or 1 if the second half should.  When returning true, store the
 300    associated permutation in INDICES.  */
 301
 302 static bool
 303 interleave_supported_p (vec_perm_indices *indices, tree vectype,
 304                         unsigned int offset)
 305 {
 306   poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
 307   poly_uint64 base = exact_div (nelts, 2) * offset;
 308   vec_perm_builder sel (nelts, 2, 3);
 309   for (unsigned int i = 0; i < 3; ++i)
 310     {
 311       sel.quick_push (base + i);
 312       sel.quick_push (base + i + nelts);
 313     }
 314   indices->new_vector (sel, 2, nelts);
 315   return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
 316                                *indices);
 317 }
 318
 319 /* Try to use permutes to define the masks in DEST_RGM using the masks
 320    in SRC_RGM, given that the former has twice as many masks as the
 321    latter.  Return true on success, adding any new statements to SEQ.  */
 322
 323 static bool
 324 vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm,
 325                                rgroup_controls *src_rgm)
 326 {
 327   tree src_masktype = src_rgm->type;
 328   tree dest_masktype = dest_rgm->type;
 329   machine_mode src_mode = TYPE_MODE (src_masktype);
 330   insn_code icode1, icode2;
 331   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
 332       && (icode1 = optab_handler (vec_unpacku_hi_optab,
 333                                   src_mode)) != CODE_FOR_nothing
 334       && (icode2 = optab_handler (vec_unpacku_lo_optab,
 335                                   src_mode)) != CODE_FOR_nothing)
 336     {
 337       /* Unpacking the source masks gives at least as many mask bits as
 338          we need.  We can then VIEW_CONVERT any excess bits away.  */
 339       machine_mode dest_mode = insn_data[icode1].operand[0].mode;
 340       gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
 341       tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
 342       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
 343         {
 344           tree src = src_rgm->controls[i / 2];
 345           tree dest = dest_rgm->controls[i];
 346           tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
 347                             ? VEC_UNPACK_HI_EXPR
 348                             : VEC_UNPACK_LO_EXPR);
 349           gassign *stmt;
 350           if (dest_masktype == unpack_masktype)
 351             stmt = gimple_build_assign (dest, code, src);
 352           else
 353             {
 354               tree temp = make_ssa_name (unpack_masktype);
 355               stmt = gimple_build_assign (temp, code, src);
 356               gimple_seq_add_stmt (seq, stmt);
 357               stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
 358                                           build1 (VIEW_CONVERT_EXPR,
 359                                                   dest_masktype, temp));
 360             }
 361           gimple_seq_add_stmt (seq, stmt);
 362         }
 363       return true;
 364     }
 365   vec_perm_indices indices[2];
 366   if (dest_masktype == src_masktype
 367       && interleave_supported_p (&indices[0], src_masktype, 0)
 368       && interleave_supported_p (&indices[1], src_masktype, 1))
 369     {
 370       /* The destination requires twice as many mask bits as the source, so
 371          we can use interleaving permutes to double up the number of bits.  */
 372       tree masks[2];
 373       for (unsigned int i = 0; i < 2; ++i)
 374         masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
 375       for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
 376         {
 377           tree src = src_rgm->controls[i / 2];
 378           tree dest = dest_rgm->controls[i];
 379           gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
 380                                               src, src, masks[i & 1]);
 381           gimple_seq_add_stmt (seq, stmt);
 382         }
 383       return true;
 384     }
 385   return false;
 386 }
 387
 388 /* Populate DEST_RGM->controls, given that they should add up to STEP.
 389
 390      STEP = MIN_EXPR <ivtmp_34, VF>;
 391
 392      First length (MIN (X, VF/N)):
 393        loop_len_15 = MIN_EXPR <STEP, VF/N>;
 394
 395      Second length:
 396        tmp = STEP - loop_len_15;
 397        loop_len_16 = MIN (tmp, VF/N);
 398
 399      Third length:
 400        tmp2 = tmp - loop_len_16;
 401        loop_len_17 = MIN (tmp2, VF/N);
 402
 403      Last length:
 404        loop_len_18 = tmp2 - loop_len_17;
 405 */
 406
 407 static void
 408 vect_adjust_loop_lens_control (tree iv_type, gimple_seq *seq,
 409                                rgroup_controls *dest_rgm, tree step)
 410 {
 411   tree ctrl_type = dest_rgm->type;
 412   poly_uint64 nitems_per_ctrl
 413     = TYPE_VECTOR_SUBPARTS (ctrl_type) * dest_rgm->factor;
 414   tree length_limit = build_int_cst (iv_type, nitems_per_ctrl);
 415
 416   for (unsigned int i = 0; i < dest_rgm->controls.length (); ++i)
 417     {
 418       tree ctrl = dest_rgm->controls[i];
 419       if (i == 0)
 420         {
 421           /* First iteration: MIN (X, VF/N) capped to the range [0, VF/N].  */
 422           gassign *assign
 423             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
 424           gimple_seq_add_stmt (seq, assign);
 425         }
 426       else if (i == dest_rgm->controls.length () - 1)
 427         {
 428           /* Last iteration: Remain capped to the range [0, VF/N].  */
 429           gassign *assign = gimple_build_assign (ctrl, MINUS_EXPR, step,
 430                                                  dest_rgm->controls[i - 1]);
 431           gimple_seq_add_stmt (seq, assign);
 432         }
 433       else
 434         {
 435           /* (MIN (remain, VF*I/N)) capped to the range [0, VF/N].  */
 436           step = gimple_build (seq, MINUS_EXPR, iv_type, step,
 437                                dest_rgm->controls[i - 1]);
 438           gassign *assign
 439             = gimple_build_assign (ctrl, MIN_EXPR, step, length_limit);
 440           gimple_seq_add_stmt (seq, assign);
 441         }
 442     }
 443 }
 444
 445 /* Helper for vect_set_loop_condition_partial_vectors.  Generate definitions
 446    for all the rgroup controls in RGC and return a control that is nonzero
 447    when the loop needs to iterate.  Add any new preheader statements to
 448    PREHEADER_SEQ.  Use LOOP_COND_GSI to insert code before the exit gcond.
 449
 450    RGC belongs to loop LOOP.  The loop originally iterated NITERS
 451    times and has been vectorized according to LOOP_VINFO.
 452
 453    If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
 454    starts with NITERS_SKIP dummy iterations of the scalar loop before
 455    the real work starts.  The mask elements for these dummy iterations
 456    must be 0, to ensure that the extra iterations do not have an effect.
 457
 458    It is known that:
 459
 460      NITERS * RGC->max_nscalars_per_iter * RGC->factor
 461
 462    does not overflow.  However, MIGHT_WRAP_P says whether an induction
 463    variable that starts at 0 and has step:
 464
 465      VF * RGC->max_nscalars_per_iter * RGC->factor
 466
 467    might overflow before hitting a value above:
 468
 469      (NITERS + NITERS_SKIP) * RGC->max_nscalars_per_iter * RGC->factor
 470
 471    This means that we cannot guarantee that such an induction variable
 472    would ever hit a value that produces a set of all-false masks or zero
 473    lengths for RGC.
 474
 475    Note: the cost of the code generated by this function is modeled
 476    by vect_estimate_min_profitable_iters, so changes here may need
 477    corresponding changes there.  */
 478
 479 static tree
 480 vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
 481                                  gimple_seq *preheader_seq,
 482                                  gimple_seq *header_seq,
 483                                  gimple_stmt_iterator loop_cond_gsi,
 484                                  rgroup_controls *rgc, tree niters,
 485                                  tree niters_skip, bool might_wrap_p,
 486                                  tree *iv_step, tree *compare_step)
 487 {
 488   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
 489   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
 490   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
 491
 492   tree ctrl_type = rgc->type;
 493   unsigned int nitems_per_iter = rgc->max_nscalars_per_iter * rgc->factor;
 494   poly_uint64 nitems_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type) * rgc->factor;
 495   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 496   tree length_limit = NULL_TREE;
 497   /* For length, we need length_limit to ensure length in range.  */
 498   if (!use_masks_p)
 499     length_limit = build_int_cst (compare_type, nitems_per_ctrl);
 500
 501   /* Calculate the maximum number of item values that the rgroup
 502      handles in total, the number that it handles for each iteration
 503      of the vector loop, and the number that it should skip during the
 504      first iteration of the vector loop.  */
 505   tree nitems_total = niters;
 506   tree nitems_step = build_int_cst (iv_type, vf);
 507   tree nitems_skip = niters_skip;
 508   if (nitems_per_iter != 1)
 509     {
 510       /* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P that
 511          these multiplications don't overflow.  */
 512       tree compare_factor = build_int_cst (compare_type, nitems_per_iter);
 513       tree iv_factor = build_int_cst (iv_type, nitems_per_iter);
 514       nitems_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
 515                                    nitems_total, compare_factor);
 516       nitems_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
 517                                   nitems_step, iv_factor);
 518       if (nitems_skip)
 519         nitems_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
 520                                     nitems_skip, compare_factor);
 521     }
 522
 523   /* Create an induction variable that counts the number of items
 524      processed.  */
 525   tree index_before_incr, index_after_incr;
 526   gimple_stmt_iterator incr_gsi;
 527   bool insert_after;
 528   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 529   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
 530     {
 531       /* Create an IV that counts down from niters_total and whose step
 532          is the (variable) amount processed in the current iteration:
 533            ...
 534            _10 = (unsigned long) count_12(D);
 535            ...
 536            # ivtmp_9 = PHI <ivtmp_35(6), _10(5)>
 537            _36 = MIN_EXPR <ivtmp_9, POLY_INT_CST [4, 4]>;
 538            ...
 539            vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
 540            ...
 541            ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
 542            ...
 543            if (ivtmp_9 > POLY_INT_CST [4, 4])
 544              goto <bb 4>; [83.33%]
 545            else
 546              goto <bb 5>; [16.67%]
 547       */
 548       nitems_total = gimple_convert (preheader_seq, iv_type, nitems_total);
 549       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
 550                                                : make_ssa_name (iv_type);
 551       /* Create decrement IV.  */
 552       create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
 553                  &incr_gsi, insert_after, &index_before_incr,
 554                  &index_after_incr);
 555       gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
 556                                                             index_before_incr,
 557                                                             nitems_step));
 558       *iv_step = step;
 559       *compare_step = nitems_step;
 560       return index_before_incr;
 561     }
 562
 563   /* Create increment IV.  */
 564   create_iv (build_int_cst (iv_type, 0), PLUS_EXPR, nitems_step, NULL_TREE,
 565              loop, &incr_gsi, insert_after, &index_before_incr,
 566              &index_after_incr);
 567
 568   tree zero_index = build_int_cst (compare_type, 0);
 569   tree test_index, test_limit, first_limit;
 570   gimple_stmt_iterator *test_gsi;
 571   if (might_wrap_p)
 572     {
 573       /* In principle the loop should stop iterating once the incremented
 574          IV reaches a value greater than or equal to:
 575
 576            NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP
 577
 578          However, there's no guarantee that this addition doesn't overflow
 579          the comparison type, or that the IV hits a value above it before
 580          wrapping around.  We therefore adjust the limit down by one
 581          IV step:
 582
 583            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
 584            -[infinite-prec] NITEMS_STEP
 585
 586          and compare the IV against this limit _before_ incrementing it.
 587          Since the comparison type is unsigned, we actually want the
 588          subtraction to saturate at zero:
 589
 590            (NITEMS_TOTAL +[infinite-prec] NITEMS_SKIP)
 591            -[sat] NITEMS_STEP
 592
 593          And since NITEMS_SKIP < NITEMS_STEP, we can reassociate this as:
 594
 595            NITEMS_TOTAL -[sat] (NITEMS_STEP - NITEMS_SKIP)
 596
 597          where the rightmost subtraction can be done directly in
 598          COMPARE_TYPE.  */
 599       test_index = index_before_incr;
 600       tree adjust = gimple_convert (preheader_seq, compare_type,
 601                                     nitems_step);
 602       if (nitems_skip)
 603         adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
 604                                adjust, nitems_skip);
 605       test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
 606                                  nitems_total, adjust);
 607       test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
 608                                  test_limit, adjust);
 609       test_gsi = &incr_gsi;
 610
 611       /* Get a safe limit for the first iteration.  */
 612       if (nitems_skip)
 613         {
 614           /* The first vector iteration can handle at most NITEMS_STEP
 615              items.  NITEMS_STEP <= CONST_LIMIT, and adding
 616              NITEMS_SKIP to that cannot overflow.  */
 617           tree const_limit = build_int_cst (compare_type,
 618                                             LOOP_VINFO_VECT_FACTOR (loop_vinfo)
 619                                             * nitems_per_iter);
 620           first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
 621                                       nitems_total, const_limit);
 622           first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
 623                                       first_limit, nitems_skip);
 624         }
 625       else
 626         /* For the first iteration it doesn't matter whether the IV hits
 627            a value above NITEMS_TOTAL.  That only matters for the latch
 628            condition.  */
 629         first_limit = nitems_total;
 630     }
 631   else
 632     {
 633       /* Test the incremented IV, which will always hit a value above
 634          the bound before wrapping.  */
 635       test_index = index_after_incr;
 636       test_limit = nitems_total;
 637       if (nitems_skip)
 638         test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
 639                                    test_limit, nitems_skip);
 640       test_gsi = &loop_cond_gsi;
 641
 642       first_limit = test_limit;
 643     }
 644
 645   /* Convert the IV value to the comparison type (either a no-op or
 646      a demotion).  */
 647   gimple_seq test_seq = NULL;
 648   test_index = gimple_convert (&test_seq, compare_type, test_index);
 649   gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
 650
 651   /* Provide a definition of each control in the group.  */
 652   tree next_ctrl = NULL_TREE;
 653   tree ctrl;
 654   unsigned int i;
 655   FOR_EACH_VEC_ELT_REVERSE (rgc->controls, i, ctrl)
 656     {
 657       /* Previous controls will cover BIAS items.  This control covers the
 658          next batch.  */
 659       poly_uint64 bias = nitems_per_ctrl * i;
 660       tree bias_tree = build_int_cst (compare_type, bias);
 661
 662       /* See whether the first iteration of the vector loop is known
 663          to have a full control.  */
 664       poly_uint64 const_limit;
 665       bool first_iteration_full
 666         = (poly_int_tree_p (first_limit, &const_limit)
 667            && known_ge (const_limit, (i + 1) * nitems_per_ctrl));
 668
 669       /* Rather than have a new IV that starts at BIAS and goes up to
 670          TEST_LIMIT, prefer to use the same 0-based IV for each control
 671          and adjust the bound down by BIAS.  */
 672       tree this_test_limit = test_limit;
 673       if (i != 0)
 674         {
 675           this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
 676                                           compare_type, this_test_limit,
 677                                           bias_tree);
 678           this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
 679                                           compare_type, this_test_limit,
 680                                           bias_tree);
 681         }
 682
 683       /* Create the initial control.  First include all items that
 684          are within the loop limit.  */
 685       tree init_ctrl = NULL_TREE;
 686       if (!first_iteration_full)
 687         {
 688           tree start, end;
 689           if (first_limit == test_limit)
 690             {
 691               /* Use a natural test between zero (the initial IV value)
 692                  and the loop limit.  The "else" block would be valid too,
 693                  but this choice can avoid the need to load BIAS_TREE into
 694                  a register.  */
 695               start = zero_index;
 696               end = this_test_limit;
 697             }
 698           else
 699             {
 700               /* FIRST_LIMIT is the maximum number of items handled by the
 701                  first iteration of the vector loop.  Test the portion
 702                  associated with this control.  */
 703               start = bias_tree;
 704               end = first_limit;
 705             }
 706
 707           if (use_masks_p)
 708             init_ctrl = vect_gen_while (preheader_seq, ctrl_type,
 709                                         start, end, "max_mask");
 710           else
 711             {
 712               init_ctrl = make_temp_ssa_name (compare_type, NULL, "max_len");
 713               gimple_seq seq = vect_gen_len (init_ctrl, start,
 714                                              end, length_limit);
 715               gimple_seq_add_seq (preheader_seq, seq);
 716             }
 717         }
 718
 719       /* Now AND out the bits that are within the number of skipped
 720          items.  */
 721       poly_uint64 const_skip;
 722       if (nitems_skip
 723           && !(poly_int_tree_p (nitems_skip, &const_skip)
 724                && known_le (const_skip, bias)))
 725         {
 726           gcc_assert (use_masks_p);
 727           tree unskipped_mask = vect_gen_while_not (preheader_seq, ctrl_type,
 728                                                     bias_tree, nitems_skip);
 729           if (init_ctrl)
 730             init_ctrl = gimple_build (preheader_seq, BIT_AND_EXPR, ctrl_type,
 731                                       init_ctrl, unskipped_mask);
 732           else
 733             init_ctrl = unskipped_mask;
 734         }
 735
 736       if (!init_ctrl)
 737         {
 738           /* First iteration is full.  */
 739           if (use_masks_p)
 740             init_ctrl = build_minus_one_cst (ctrl_type);
 741           else
 742             init_ctrl = length_limit;
 743         }
 744
 745       /* Get the control value for the next iteration of the loop.  */
 746       if (use_masks_p)
 747         {
 748           gimple_seq stmts = NULL;
 749           next_ctrl = vect_gen_while (&stmts, ctrl_type, test_index,
 750                                       this_test_limit, "next_mask");
 751           gsi_insert_seq_before (test_gsi, stmts, GSI_SAME_STMT);
 752         }
 753       else
 754         {
 755           next_ctrl = make_temp_ssa_name (compare_type, NULL, "next_len");
 756           gimple_seq seq = vect_gen_len (next_ctrl, test_index, this_test_limit,
 757                                          length_limit);
 758           gsi_insert_seq_before (test_gsi, seq, GSI_SAME_STMT);
 759         }
 760
 761       vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl);
 762     }
 763
 764   int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
 765   if (partial_load_bias != 0)
 766     {
 767       tree adjusted_len = rgc->bias_adjusted_ctrl;
 768       gassign *minus = gimple_build_assign (adjusted_len, PLUS_EXPR,
 769                                             rgc->controls[0],
 770                                             build_int_cst
 771                                             (TREE_TYPE (rgc->controls[0]),
 772                                              partial_load_bias));
 773       gimple_seq_add_stmt (header_seq, minus);
 774     }
 775
 776   return next_ctrl;
 777 }
 778
 779 /* Set up the iteration condition and rgroup controls for LOOP, given
 780    that LOOP_VINFO_USING_PARTIAL_VECTORS_P is true for the vectorized
 781    loop.  LOOP_VINFO describes the vectorization of LOOP.  NITERS is
 782    the number of iterations of the original scalar loop that should be
 783    handled by the vector loop.  NITERS_MAYBE_ZERO and FINAL_IV are as
 784    for vect_set_loop_condition.
 785
 786    Insert the branch-back condition before LOOP_COND_GSI and return the
 787    final gcond.  */
 788
 789 static gcond *
 790 vect_set_loop_condition_partial_vectors (class loop *loop,
 791                                          loop_vec_info loop_vinfo, tree niters,
 792                                          tree final_iv, bool niters_maybe_zero,
 793                                          gimple_stmt_iterator loop_cond_gsi)
 794 {
 795   gimple_seq preheader_seq = NULL;
 796   gimple_seq header_seq = NULL;
 797
 798   bool use_masks_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
 799   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
 800   unsigned int compare_precision = TYPE_PRECISION (compare_type);
 801   tree orig_niters = niters;
 802
 803   /* Type of the initial value of NITERS.  */
 804   tree ni_actual_type = TREE_TYPE (niters);
 805   unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
 806   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
 807
 808   /* Convert NITERS to the same size as the compare.  */
 809   if (compare_precision > ni_actual_precision
 810       && niters_maybe_zero)
 811     {
 812       /* We know that there is always at least one iteration, so if the
 813          count is zero then it must have wrapped.  Cope with this by
 814          subtracting 1 before the conversion and adding 1 to the result.  */
 815       gcc_assert (TYPE_UNSIGNED (ni_actual_type));
 816       niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
 817                              niters, build_minus_one_cst (ni_actual_type));
 818       niters = gimple_convert (&preheader_seq, compare_type, niters);
 819       niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
 820                              niters, build_one_cst (compare_type));
 821     }
 822   else
 823     niters = gimple_convert (&preheader_seq, compare_type, niters);
 824
 825   /* Iterate over all the rgroups and fill in their controls.  We could use
 826      the first control from any rgroup for the loop condition; here we
 827      arbitrarily pick the last.  */
 828   tree test_ctrl = NULL_TREE;
 829   tree iv_step = NULL_TREE;
 830   tree compare_step = NULL_TREE;
 831   rgroup_controls *rgc;
 832   rgroup_controls *iv_rgc = nullptr;
 833   unsigned int i;
 834   auto_vec<rgroup_controls> *controls = use_masks_p
 835                                           ? &LOOP_VINFO_MASKS (loop_vinfo)
 836                                           : &LOOP_VINFO_LENS (loop_vinfo);
 837   FOR_EACH_VEC_ELT (*controls, i, rgc)
 838     if (!rgc->controls.is_empty ())
 839       {
 840         /* First try using permutes.  This adds a single vector
 841            instruction to the loop for each mask, but needs no extra
 842            loop invariants or IVs.  */
 843         unsigned int nmasks = i + 1;
 844         if (use_masks_p && (nmasks & 1) == 0)
 845           {
 846             rgroup_controls *half_rgc = &(*controls)[nmasks / 2 - 1];
 847             if (!half_rgc->controls.is_empty ()
 848                 && vect_maybe_permute_loop_masks (&header_seq, rgc, half_rgc))
 849               continue;
 850           }
 851
 852         if (!LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
 853             || !iv_rgc
 854             || (iv_rgc->max_nscalars_per_iter * iv_rgc->factor
 855                 != rgc->max_nscalars_per_iter * rgc->factor))
 856           {
 857             /* See whether zero-based IV would ever generate all-false masks
 858                or zero length before wrapping around.  */
 859             bool might_wrap_p = vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc);
 860
 861             /* Set up all controls for this group.  */
 862             test_ctrl
 863               = vect_set_loop_controls_directly (loop, loop_vinfo,
 864                                                  &preheader_seq, &header_seq,
 865                                                  loop_cond_gsi, rgc, niters,
 866                                                  niters_skip, might_wrap_p,
 867                                                  &iv_step, &compare_step);
 868
 869             iv_rgc = rgc;
 870           }
 871
 872         if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
 873             && rgc->controls.length () > 1)
 874           {
 875             /* vect_set_loop_controls_directly creates an IV whose step
 876                is equal to the expected sum of RGC->controls.  Use that
 877                information to populate RGC->controls.  */
 878             tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
 879             gcc_assert (iv_step);
 880             vect_adjust_loop_lens_control (iv_type, &header_seq, rgc, iv_step);
 881           }
 882       }
 883
 884   /* Emit all accumulated statements.  */
 885   add_preheader_seq (loop, preheader_seq);
 886   add_header_seq (loop, header_seq);
 887
 888   /* Get a boolean result that tells us whether to iterate.  */
 889   edge exit_edge = single_exit (loop);
 890   gcond *cond_stmt;
 891   if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
 892     {
 893       gcc_assert (compare_step);
 894       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
 895       cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
 896                                      NULL_TREE);
 897     }
 898   else
 899     {
 900       tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 901       tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
 902       cond_stmt
 903         = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
 904     }
 905   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 906
 907   /* The loop iterates (NITERS - 1) / VF + 1 times.
 908      Subtract one from this to get the latch count.  */
 909   tree step = build_int_cst (compare_type,
 910                              LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 911   tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
 912                                        build_minus_one_cst (compare_type));
 913   loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
 914                                      niters_minus_one, step);
 915
 916   if (final_iv)
 917     {
 918       gassign *assign = gimple_build_assign (final_iv, orig_niters);
 919       gsi_insert_on_edge_immediate (single_exit (loop), assign);
 920     }
 921
 922   return cond_stmt;
 923 }
 924
 925 /* Like vect_set_loop_condition, but handle the case in which the vector
 926    loop handles exactly VF scalars per iteration.  */
 927
 928 static gcond *
 929 vect_set_loop_condition_normal (class loop *loop, tree niters, tree step,
 930                                 tree final_iv, bool niters_maybe_zero,
 931                                 gimple_stmt_iterator loop_cond_gsi)
 932 {
 933   tree indx_before_incr, indx_after_incr;
 934   gcond *cond_stmt;
 935   gcond *orig_cond;
 936   edge pe = loop_preheader_edge (loop);
 937   edge exit_edge = single_exit (loop);
 938   gimple_stmt_iterator incr_gsi;
 939   bool insert_after;
 940   enum tree_code code;
 941   tree niters_type = TREE_TYPE (niters);
 942
 943   orig_cond = get_loop_exit_condition (loop);
 944   gcc_assert (orig_cond);
 945   loop_cond_gsi = gsi_for_stmt (orig_cond);
 946
 947   tree init, limit;
 948   if (!niters_maybe_zero && integer_onep (step))
 949     {
 950       /* In this case we can use a simple 0-based IV:
 951
 952          A:
 953            x = 0;
 954            do
 955              {
 956                ...
 957                x += 1;
 958              }
 959            while (x < NITERS);  */
 960       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 961       init = build_zero_cst (niters_type);
 962       limit = niters;
 963     }
 964   else
 965     {
 966       /* The following works for all values of NITERS except 0:
 967
 968          B:
 969            x = 0;
 970            do
 971              {
 972                ...
 973                x += STEP;
 974              }
 975            while (x <= NITERS - STEP);
 976
 977          so that the loop continues to iterate if x + STEP - 1 < NITERS
 978          but stops if x + STEP - 1 >= NITERS.
 979
 980          However, if NITERS is zero, x never hits a value above NITERS - STEP
 981          before wrapping around.  There are two obvious ways of dealing with
 982          this:
 983
 984          - start at STEP - 1 and compare x before incrementing it
 985          - start at -1 and compare x after incrementing it
 986
 987          The latter is simpler and is what we use.  The loop in this case
 988          looks like:
 989
 990          C:
 991            x = -1;
 992            do
 993              {
 994                ...
 995                x += STEP;
 996              }
 997            while (x < NITERS - STEP);
 998
 999          In both cases the loop limit is NITERS - STEP.  */
1000       gimple_seq seq = NULL;
1001       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
1002       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
1003       if (seq)
1004         {
1005           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1006           gcc_assert (!new_bb);
1007         }
1008       if (niters_maybe_zero)
1009         {
1010           /* Case C.  */
1011           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
1012           init = build_all_ones_cst (niters_type);
1013         }
1014       else
1015         {
1016           /* Case B.  */
1017           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
1018           init = build_zero_cst (niters_type);
1019         }
1020     }
1021
1022   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1023   create_iv (init, PLUS_EXPR, step, NULL_TREE, loop,
1024              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
1025   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
1026                                               true, NULL_TREE, true,
1027                                               GSI_SAME_STMT);
1028   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
1029                                      true, GSI_SAME_STMT);
1030
1031   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
1032                                  NULL_TREE);
1033
1034   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
1035
1036   /* Record the number of latch iterations.  */
1037   if (limit == niters)
1038     /* Case A: the loop iterates NITERS times.  Subtract one to get the
1039        latch count.  */
1040     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
1041                                        build_int_cst (niters_type, 1));
1042   else
1043     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
1044        Subtract one from this to get the latch count.  */
1045     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
1046                                        limit, step);
1047
1048   if (final_iv)
1049     {
1050       gassign *assign;
1051       edge exit = single_exit (loop);
1052       gcc_assert (single_pred_p (exit->dest));
1053       tree phi_dest
1054         = integer_zerop (init) ? final_iv : copy_ssa_name (indx_after_incr);
1055       /* Make sure to maintain LC SSA form here and elide the subtraction
1056          if the value is zero.  */
1057       gphi *phi = create_phi_node (phi_dest, exit->dest);
1058       add_phi_arg (phi, indx_after_incr, exit, UNKNOWN_LOCATION);
1059       if (!integer_zerop (init))
1060         {
1061           assign = gimple_build_assign (final_iv, MINUS_EXPR,
1062                                         phi_dest, init);
1063           gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
1064           gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
1065         }
1066     }
1067
1068   return cond_stmt;
1069 }
1070
1071 /* If we're using fully-masked loops, make LOOP iterate:
1072
1073       N == (NITERS - 1) / STEP + 1
1074
1075    times.  When NITERS is zero, this is equivalent to making the loop
1076    execute (1 << M) / STEP times, where M is the precision of NITERS.
1077    NITERS_MAYBE_ZERO is true if this last case might occur.
1078
1079    If we're not using fully-masked loops, make LOOP iterate:
1080
1081       N == (NITERS - STEP) / STEP + 1
1082
1083    times, where NITERS is known to be outside the range [1, STEP - 1].
1084    This is equivalent to making the loop execute NITERS / STEP times
1085    when NITERS is nonzero and (1 << M) / STEP times otherwise.
1086    NITERS_MAYBE_ZERO again indicates whether this last case might occur.
1087
1088    If FINAL_IV is nonnull, it is an SSA name that should be set to
1089    N * STEP on exit from the loop.
1090
1091    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
1092
1093 void
1094 vect_set_loop_condition (class loop *loop, loop_vec_info loop_vinfo,
1095                          tree niters, tree step, tree final_iv,
1096                          bool niters_maybe_zero)
1097 {
1098   gcond *cond_stmt;
1099   gcond *orig_cond = get_loop_exit_condition (loop);
1100   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
1101
1102   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
1103     cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo,
1104                                                          niters, final_iv,
1105                                                          niters_maybe_zero,
1106                                                          loop_cond_gsi);
1107   else
1108     cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv,
1109                                                 niters_maybe_zero,
1110                                                 loop_cond_gsi);
1111
1112   /* Remove old loop exit test.  */
1113   stmt_vec_info orig_cond_info;
1114   if (loop_vinfo
1115       && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
1116     loop_vinfo->remove_stmt (orig_cond_info);
1117   else
1118     gsi_remove (&loop_cond_gsi, true);
1119
1120   if (dump_enabled_p ())
1121     dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
1122                      (gimple *) cond_stmt);
1123 }
1124
1125 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
1126    For all PHI arguments in FROM->dest and TO->dest from those
1127    edges ensure that TO->dest PHI arguments have current_def
1128    to that in from.  */
1129
1130 static void
1131 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
1132 {
1133   gimple_stmt_iterator gsi_from, gsi_to;
1134
1135   for (gsi_from = gsi_start_phis (from->dest),
1136        gsi_to = gsi_start_phis (to->dest);
1137        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
1138     {
1139       gimple *from_phi = gsi_stmt (gsi_from);
1140       gimple *to_phi = gsi_stmt (gsi_to);
1141       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
1142       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
1143       if (virtual_operand_p (from_arg))
1144         {
1145           gsi_next (&gsi_from);
1146           continue;
1147         }
1148       if (virtual_operand_p (to_arg))
1149         {
1150           gsi_next (&gsi_to);
1151           continue;
1152         }
1153       if (TREE_CODE (from_arg) != SSA_NAME)
1154         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
1155       else if (TREE_CODE (to_arg) == SSA_NAME
1156                && from_arg != to_arg)
1157         {
1158           if (get_current_def (to_arg) == NULL_TREE)
1159             {
1160               gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
1161                                               TREE_TYPE (get_current_def
1162                                                            (from_arg))));
1163               set_current_def (to_arg, get_current_def (from_arg));
1164             }
1165         }
1166       gsi_next (&gsi_from);
1167       gsi_next (&gsi_to);
1168     }
1169
1170   gphi *from_phi = get_virtual_phi (from->dest);
1171   gphi *to_phi = get_virtual_phi (to->dest);
1172   if (from_phi)
1173     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
1174                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
1175 }
1176
1177
1178 /* Given LOOP this function generates a new copy of it and puts it
1179    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
1180    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1181    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1182    entry or exit of LOOP.  */
1183
1184 class loop *
1185 slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop,
1186                                         class loop *scalar_loop, edge e)
1187 {
1188   class loop *new_loop;
1189   basic_block *new_bbs, *bbs, *pbbs;
1190   bool at_exit;
1191   bool was_imm_dom;
1192   basic_block exit_dest;
1193   edge exit, new_exit;
1194   bool duplicate_outer_loop = false;
1195
1196   exit = single_exit (loop);
1197   at_exit = (e == exit);
1198   if (!at_exit && e != loop_preheader_edge (loop))
1199     return NULL;
1200
1201   if (scalar_loop == NULL)
1202     scalar_loop = loop;
1203
1204   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1205   pbbs = bbs + 1;
1206   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
1207   /* Allow duplication of outer loops.  */
1208   if (scalar_loop->inner)
1209     duplicate_outer_loop = true;
1210
1211   /* Generate new loop structure.  */
1212   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1213   duplicate_subloops (scalar_loop, new_loop);
1214
1215   exit_dest = exit->dest;
1216   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1217                                           exit_dest) == loop->header ?
1218                  true : false);
1219
1220   /* Also copy the pre-header, this avoids jumping through hoops to
1221      duplicate the loop entry PHI arguments.  Create an empty
1222      pre-header unconditionally for this.  */
1223   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
1224   edge entry_e = single_pred_edge (preheader);
1225   bbs[0] = preheader;
1226   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
1227
1228   exit = single_exit (scalar_loop);
1229   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
1230             &exit, 1, &new_exit, NULL,
1231             at_exit ? loop->latch : e->src, true);
1232   exit = single_exit (loop);
1233   basic_block new_preheader = new_bbs[0];
1234
1235   /* Before installing PHI arguments make sure that the edges
1236      into them match that of the scalar loop we analyzed.  This
1237      makes sure the SLP tree matches up between the main vectorized
1238      loop and the epilogue vectorized copies.  */
1239   if (single_succ_edge (preheader)->dest_idx
1240       != single_succ_edge (new_bbs[0])->dest_idx)
1241     {
1242       basic_block swap_bb = new_bbs[1];
1243       gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1244       std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1245       EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1246       EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1247     }
1248   if (duplicate_outer_loop)
1249     {
1250       class loop *new_inner_loop = get_loop_copy (scalar_loop->inner);
1251       if (loop_preheader_edge (scalar_loop)->dest_idx
1252           != loop_preheader_edge (new_inner_loop)->dest_idx)
1253         {
1254           basic_block swap_bb = new_inner_loop->header;
1255           gcc_assert (EDGE_COUNT (swap_bb->preds) == 2);
1256           std::swap (EDGE_PRED (swap_bb, 0), EDGE_PRED (swap_bb, 1));
1257           EDGE_PRED (swap_bb, 0)->dest_idx = 0;
1258           EDGE_PRED (swap_bb, 1)->dest_idx = 1;
1259         }
1260     }
1261
1262   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
1263
1264   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
1265   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
1266     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
1267
1268   if (scalar_loop != loop)
1269     {
1270       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1271          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1272          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
1273          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1274          header) to have current_def set, so copy them over.  */
1275       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
1276                                                 exit);
1277       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1278                                                            0),
1279                                                 EDGE_SUCC (loop->latch, 0));
1280     }
1281
1282   if (at_exit) /* Add the loop copy at exit.  */
1283     {
1284       if (scalar_loop != loop)
1285         {
1286           gphi_iterator gsi;
1287           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1288
1289           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
1290                gsi_next (&gsi))
1291             {
1292               gphi *phi = gsi.phi ();
1293               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
1294               location_t orig_locus
1295                 = gimple_phi_arg_location_from_edge (phi, e);
1296
1297               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
1298             }
1299         }
1300       redirect_edge_and_branch_force (e, new_preheader);
1301       flush_pending_stmts (e);
1302       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
1303       if (was_imm_dom || duplicate_outer_loop)
1304         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
1305
1306       /* And remove the non-necessary forwarder again.  Keep the other
1307          one so we have a proper pre-header for the loop at the exit edge.  */
1308       redirect_edge_pred (single_succ_edge (preheader),
1309                           single_pred (preheader));
1310       delete_basic_block (preheader);
1311       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1312                                loop_preheader_edge (scalar_loop)->src);
1313     }
1314   else /* Add the copy at entry.  */
1315     {
1316       if (scalar_loop != loop)
1317         {
1318           /* Remove the non-necessary forwarder of scalar_loop again.  */
1319           redirect_edge_pred (single_succ_edge (preheader),
1320                               single_pred (preheader));
1321           delete_basic_block (preheader);
1322           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1323                                    loop_preheader_edge (scalar_loop)->src);
1324           preheader = split_edge (loop_preheader_edge (loop));
1325           entry_e = single_pred_edge (preheader);
1326         }
1327
1328       redirect_edge_and_branch_force (entry_e, new_preheader);
1329       flush_pending_stmts (entry_e);
1330       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1331
1332       redirect_edge_and_branch_force (new_exit, preheader);
1333       flush_pending_stmts (new_exit);
1334       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1335
1336       /* And remove the non-necessary forwarder again.  Keep the other
1337          one so we have a proper pre-header for the loop at the exit edge.  */
1338       redirect_edge_pred (single_succ_edge (new_preheader),
1339                           single_pred (new_preheader));
1340       delete_basic_block (new_preheader);
1341       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1342                                loop_preheader_edge (new_loop)->src);
1343     }
1344
1345   if (scalar_loop != loop)
1346     {
1347       /* Update new_loop->header PHIs, so that on the preheader
1348          edge they are the ones from loop rather than scalar_loop.  */
1349       gphi_iterator gsi_orig, gsi_new;
1350       edge orig_e = loop_preheader_edge (loop);
1351       edge new_e = loop_preheader_edge (new_loop);
1352
1353       for (gsi_orig = gsi_start_phis (loop->header),
1354            gsi_new = gsi_start_phis (new_loop->header);
1355            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
1356            gsi_next (&gsi_orig), gsi_next (&gsi_new))
1357         {
1358           gphi *orig_phi = gsi_orig.phi ();
1359           gphi *new_phi = gsi_new.phi ();
1360           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1361           location_t orig_locus
1362             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1363
1364           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
1365         }
1366     }
1367
1368   free (new_bbs);
1369   free (bbs);
1370
1371   checking_verify_dominators (CDI_DOMINATORS);
1372
1373   return new_loop;
1374 }
1375
1376
1377 /* Given the condition expression COND, put it as the last statement of
1378    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1379    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
1380    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
1381    new edge as irreducible if IRREDUCIBLE_P is true.  */
1382
1383 static edge
1384 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
1385                        basic_block guard_to, basic_block dom_bb,
1386                        profile_probability probability, bool irreducible_p)
1387 {
1388   gimple_stmt_iterator gsi;
1389   edge new_e, enter_e;
1390   gcond *cond_stmt;
1391   gimple_seq gimplify_stmt_list = NULL;
1392
1393   enter_e = EDGE_SUCC (guard_bb, 0);
1394   enter_e->flags &= ~EDGE_FALLTHRU;
1395   enter_e->flags |= EDGE_FALSE_VALUE;
1396   gsi = gsi_last_bb (guard_bb);
1397
1398   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list,
1399                                  is_gimple_condexpr_for_cond, NULL_TREE);
1400   if (gimplify_stmt_list)
1401     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1402
1403   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
1404   gsi = gsi_last_bb (guard_bb);
1405   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1406
1407   /* Add new edge to connect guard block to the merge/loop-exit block.  */
1408   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
1409
1410   new_e->probability = probability;
1411   if (irreducible_p)
1412     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1413
1414   enter_e->probability = probability.invert ();
1415   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
1416
1417   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
1418   if (enter_e->dest->loop_father->header == enter_e->dest)
1419     split_edge (enter_e);
1420
1421   return new_e;
1422 }
1423
1424
1425 /* This function verifies that the following restrictions apply to LOOP:
1426    (1) it consists of exactly 2 basic blocks - header, and an empty latch
1427        for innermost loop and 5 basic blocks for outer-loop.
1428    (2) it is single entry, single exit
1429    (3) its exit condition is the last stmt in the header
1430    (4) E is the entry/exit edge of LOOP.
1431  */
1432
1433 bool
1434 slpeel_can_duplicate_loop_p (const class loop *loop, const_edge e)
1435 {
1436   edge exit_e = single_exit (loop);
1437   edge entry_e = loop_preheader_edge (loop);
1438   gcond *orig_cond = get_loop_exit_condition (loop);
1439   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1440   unsigned int num_bb = loop->inner? 5 : 2;
1441
1442   /* All loops have an outer scope; the only case loop->outer is NULL is for
1443      the function itself.  */
1444   if (!loop_outer (loop)
1445       || loop->num_nodes != num_bb
1446       || !empty_block_p (loop->latch)
1447       || !single_exit (loop)
1448       /* Verify that new loop exit condition can be trivially modified.  */
1449       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1450       || (e != exit_e && e != entry_e))
1451     return false;
1452
1453   basic_block *bbs = XNEWVEC (basic_block, loop->num_nodes);
1454   get_loop_body_with_size (loop, bbs, loop->num_nodes);
1455   bool ret = can_copy_bbs_p (bbs, loop->num_nodes);
1456   free (bbs);
1457   return ret;
1458 }
1459
1460 /* Function vect_get_loop_location.
1461
1462    Extract the location of the loop in the source code.
1463    If the loop is not well formed for vectorization, an estimated
1464    location is calculated.
1465    Return the loop location if succeed and NULL if not.  */
1466
1467 dump_user_location_t
1468 find_loop_location (class loop *loop)
1469 {
1470   gimple *stmt = NULL;
1471   basic_block bb;
1472   gimple_stmt_iterator si;
1473
1474   if (!loop)
1475     return dump_user_location_t ();
1476
1477   stmt = get_loop_exit_condition (loop);
1478
1479   if (stmt
1480       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1481     return stmt;
1482
1483   /* If we got here the loop is probably not "well formed",
1484      try to estimate the loop location */
1485
1486   if (!loop->header)
1487     return dump_user_location_t ();
1488
1489   bb = loop->header;
1490
1491   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1492     {
1493       stmt = gsi_stmt (si);
1494       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1495         return stmt;
1496     }
1497
1498   return dump_user_location_t ();
1499 }
1500
1501 /* Return true if the phi described by STMT_INFO defines an IV of the
1502    loop to be vectorized.  */
1503
1504 static bool
1505 iv_phi_p (stmt_vec_info stmt_info)
1506 {
1507   gphi *phi = as_a <gphi *> (stmt_info->stmt);
1508   if (virtual_operand_p (PHI_RESULT (phi)))
1509     return false;
1510
1511   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1512       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1513     return false;
1514
1515   return true;
1516 }
1517
1518 /* Return true if vectorizer can peel for nonlinear iv.  */
1519 static bool
1520 vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
1521                               enum vect_induction_op_type induction_type)
1522 {
1523   tree niters_skip;
1524   /* Init_expr will be update by vect_update_ivs_after_vectorizer,
1525      if niters or vf is unkown:
1526      For shift, when shift mount >= precision, there would be UD.
1527      For mult, don't known how to generate
1528      init_expr * pow (step, niters) for variable niters.
1529      For neg, it should be ok, since niters of vectorized main loop
1530      will always be multiple of 2.  */
1531   if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1532        || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
1533       && induction_type != vect_step_op_neg)
1534     {
1535       if (dump_enabled_p ())
1536         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1537                          "Peeling for epilogue is not supported"
1538                          " for nonlinear induction except neg"
1539                          " when iteration count is unknown.\n");
1540       return false;
1541     }
1542
1543   /* Also doens't support peel for neg when niter is variable.
1544      ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
1545   niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
1546   if ((niters_skip != NULL_TREE
1547        && TREE_CODE (niters_skip) != INTEGER_CST)
1548       || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
1549           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
1550     {
1551       if (dump_enabled_p ())
1552         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1553                          "Peeling for alignement is not supported"
1554                          " for nonlinear induction when niters_skip"
1555                          " is not constant.\n");
1556       return false;
1557     }
1558
1559   return true;
1560 }
1561
1562 /* Function vect_can_advance_ivs_p
1563
1564    In case the number of iterations that LOOP iterates is unknown at compile
1565    time, an epilog loop will be generated, and the loop induction variables
1566    (IVs) will be "advanced" to the value they are supposed to take just before
1567    the epilog loop.  Here we check that the access function of the loop IVs
1568    and the expression that represents the loop bound are simple enough.
1569    These restrictions will be relaxed in the future.  */
1570
1571 bool
1572 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1573 {
1574   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1575   basic_block bb = loop->header;
1576   gphi_iterator gsi;
1577
1578   /* Analyze phi functions of the loop header.  */
1579
1580   if (dump_enabled_p ())
1581     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1582   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1583     {
1584       tree evolution_part;
1585       enum vect_induction_op_type induction_type;
1586
1587       gphi *phi = gsi.phi ();
1588       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1589       if (dump_enabled_p ())
1590         dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
1591                          phi_info->stmt);
1592
1593       /* Skip virtual phi's. The data dependences that are associated with
1594          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
1595
1596          Skip reduction phis.  */
1597       if (!iv_phi_p (phi_info))
1598         {
1599           if (dump_enabled_p ())
1600             dump_printf_loc (MSG_NOTE, vect_location,
1601                              "reduc or virtual phi. skip.\n");
1602           continue;
1603         }
1604
1605       induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
1606       if (induction_type != vect_step_op_add)
1607         {
1608           if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, induction_type))
1609             return false;
1610
1611           continue;
1612         }
1613
1614       /* Analyze the evolution function.  */
1615
1616       evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
1617       if (evolution_part == NULL_TREE)
1618         {
1619           if (dump_enabled_p ())
1620             dump_printf (MSG_MISSED_OPTIMIZATION,
1621                          "No access function or evolution.\n");
1622           return false;
1623         }
1624
1625       /* FORNOW: We do not transform initial conditions of IVs
1626          which evolution functions are not invariants in the loop.  */
1627
1628       if (!expr_invariant_in_loop_p (loop, evolution_part))
1629         {
1630           if (dump_enabled_p ())
1631             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1632                              "evolution not invariant in loop.\n");
1633           return false;
1634         }
1635
1636       /* FORNOW: We do not transform initial conditions of IVs
1637          which evolution functions are a polynomial of degree >= 2.  */
1638
1639       if (tree_is_chrec (evolution_part))
1640         {
1641           if (dump_enabled_p ())
1642             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1643                              "evolution is chrec.\n");
1644           return false;
1645         }
1646     }
1647
1648   return true;
1649 }
1650
1651
1652 /*   Function vect_update_ivs_after_vectorizer.
1653
1654      "Advance" the induction variables of LOOP to the value they should take
1655      after the execution of LOOP.  This is currently necessary because the
1656      vectorizer does not handle induction variables that are used after the
1657      loop.  Such a situation occurs when the last iterations of LOOP are
1658      peeled, because:
1659      1. We introduced new uses after LOOP for IVs that were not originally used
1660         after LOOP: the IVs of LOOP are now used by an epilog loop.
1661      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1662         times, whereas the loop IVs should be bumped N times.
1663
1664      Input:
1665      - LOOP - a loop that is going to be vectorized. The last few iterations
1666               of LOOP were peeled.
1667      - NITERS - the number of iterations that LOOP executes (before it is
1668                 vectorized). i.e, the number of times the ivs should be bumped.
1669      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1670                   coming out from LOOP on which there are uses of the LOOP ivs
1671                   (this is the path from LOOP->exit to epilog_loop->preheader).
1672
1673                   The new definitions of the ivs are placed in LOOP->exit.
1674                   The phi args associated with the edge UPDATE_E in the bb
1675                   UPDATE_E->dest are updated accordingly.
1676
1677      Assumption 1: Like the rest of the vectorizer, this function assumes
1678      a single loop exit that has a single predecessor.
1679
1680      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1681      organized in the same order.
1682
1683      Assumption 3: The access function of the ivs is simple enough (see
1684      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1685
1686      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1687      coming out of LOOP on which the ivs of LOOP are used (this is the path
1688      that leads to the epilog loop; other paths skip the epilog loop).  This
1689      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1690      needs to have its phis updated.
1691  */
1692
1693 static void
1694 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1695                                   tree niters, edge update_e)
1696 {
1697   gphi_iterator gsi, gsi1;
1698   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1699   basic_block update_bb = update_e->dest;
1700   basic_block exit_bb = single_exit (loop)->dest;
1701
1702   /* Make sure there exists a single-predecessor exit bb:  */
1703   gcc_assert (single_pred_p (exit_bb));
1704   gcc_assert (single_succ_edge (exit_bb) == update_e);
1705
1706   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1707        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1708        gsi_next (&gsi), gsi_next (&gsi1))
1709     {
1710       tree init_expr;
1711       tree step_expr, off;
1712       tree type;
1713       tree var, ni, ni_name;
1714       gimple_stmt_iterator last_gsi;
1715
1716       gphi *phi = gsi.phi ();
1717       gphi *phi1 = gsi1.phi ();
1718       stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
1719       if (dump_enabled_p ())
1720         dump_printf_loc (MSG_NOTE, vect_location,
1721                          "vect_update_ivs_after_vectorizer: phi: %G",
1722                          (gimple *) phi);
1723
1724       /* Skip reduction and virtual phis.  */
1725       if (!iv_phi_p (phi_info))
1726         {
1727           if (dump_enabled_p ())
1728             dump_printf_loc (MSG_NOTE, vect_location,
1729                              "reduc or virtual phi. skip.\n");
1730           continue;
1731         }
1732
1733       type = TREE_TYPE (gimple_phi_result (phi));
1734       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
1735       step_expr = unshare_expr (step_expr);
1736
1737       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1738          of degree >= 2 or exponential.  */
1739       gcc_assert (!tree_is_chrec (step_expr));
1740
1741       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1742       gimple_seq stmts = NULL;
1743       enum vect_induction_op_type induction_type
1744         = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
1745
1746       if (induction_type == vect_step_op_add)
1747         {
1748           tree stype = TREE_TYPE (step_expr);
1749           off = fold_build2 (MULT_EXPR, stype,
1750                              fold_convert (stype, niters), step_expr);
1751           if (POINTER_TYPE_P (type))
1752             ni = fold_build_pointer_plus (init_expr, off);
1753           else
1754             ni = fold_convert (type,
1755                                fold_build2 (PLUS_EXPR, stype,
1756                                             fold_convert (stype, init_expr),
1757                                             off));
1758         }
1759       /* Don't bother call vect_peel_nonlinear_iv_init.  */
1760       else if (induction_type == vect_step_op_neg)
1761         ni = init_expr;
1762       else
1763         ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
1764                                           niters, step_expr,
1765                                           induction_type);
1766
1767       var = create_tmp_var (type, "tmp");
1768
1769       last_gsi = gsi_last_bb (exit_bb);
1770       gimple_seq new_stmts = NULL;
1771       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1772       /* Exit_bb shouldn't be empty.  */
1773       if (!gsi_end_p (last_gsi))
1774         {
1775           gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
1776           gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1777         }
1778       else
1779         {
1780           gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
1781           gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
1782         }
1783
1784       /* Fix phi expressions in the successor bb.  */
1785       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1786     }
1787 }
1788
1789 /* Return a gimple value containing the misalignment (measured in vector
1790    elements) for the loop described by LOOP_VINFO, i.e. how many elements
1791    it is away from a perfectly aligned address.  Add any new statements
1792    to SEQ.  */
1793
1794 static tree
1795 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
1796 {
1797   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1798   stmt_vec_info stmt_info = dr_info->stmt;
1799   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1800
1801   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1802   unsigned HOST_WIDE_INT target_align_c;
1803   tree target_align_minus_1;
1804
1805   bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1806                                         size_zero_node) < 0;
1807   tree offset = (negative
1808                  ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1809                              * TREE_INT_CST_LOW
1810                                  (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
1811                  : size_zero_node);
1812   tree start_addr = vect_create_addr_base_for_vector_ref (loop_vinfo,
1813                                                           stmt_info, seq,
1814                                                           offset);
1815   tree type = unsigned_type_for (TREE_TYPE (start_addr));
1816   if (target_align.is_constant (&target_align_c))
1817     target_align_minus_1 = build_int_cst (type, target_align_c - 1);
1818   else
1819     {
1820       tree vla = build_int_cst (type, target_align);
1821       tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
1822                                     fold_build2 (MINUS_EXPR, type,
1823                                                  build_int_cst (type, 0), vla));
1824       target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
1825                                           build_int_cst (type, 1));
1826     }
1827
1828   HOST_WIDE_INT elem_size
1829     = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1830   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1831
1832   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
1833   tree int_start_addr = fold_convert (type, start_addr);
1834   tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
1835                                         target_align_minus_1);
1836
1837   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
1838   tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
1839                                         elem_size_log);
1840
1841   return misalign_in_elems;
1842 }
1843
1844 /* Function vect_gen_prolog_loop_niters
1845
1846    Generate the number of iterations which should be peeled as prolog for the
1847    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
1848    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1849    As a result, after the execution of this loop, the data reference DR will
1850    refer to an aligned location.  The following computation is generated:
1851
1852    If the misalignment of DR is known at compile time:
1853      addr_mis = int mis = DR_MISALIGNMENT (dr);
1854    Else, compute address misalignment in bytes:
1855      addr_mis = addr & (target_align - 1)
1856
1857    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1858
1859    (elem_size = element type size; an element is the scalar element whose type
1860    is the inner type of the vectype)
1861
1862    The computations will be emitted at the end of BB.  We also compute and
1863    store upper bound (included) of the result in BOUND.
1864
1865    When the step of the data-ref in the loop is not 1 (as in interleaved data
1866    and SLP), the number of iterations of the prolog must be divided by the step
1867    (which is equal to the size of interleaved group).
1868
1869    The above formulas assume that VF == number of elements in the vector. This
1870    may not hold when there are multiple-types in the loop.
1871    In this case, for some data-references in the loop the VF does not represent
1872    the number of elements that fit in the vector.  Therefore, instead of VF we
1873    use TYPE_VECTOR_SUBPARTS.  */
1874
1875 static tree
1876 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1877                              basic_block bb, int *bound)
1878 {
1879   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1880   tree var;
1881   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1882   gimple_seq stmts = NULL, new_stmts = NULL;
1883   tree iters, iters_name;
1884   stmt_vec_info stmt_info = dr_info->stmt;
1885   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1886   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1887
1888   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1889     {
1890       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1891
1892       if (dump_enabled_p ())
1893         dump_printf_loc (MSG_NOTE, vect_location,
1894                          "known peeling = %d.\n", npeel);
1895
1896       iters = build_int_cst (niters_type, npeel);
1897       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1898     }
1899   else
1900     {
1901       tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
1902       tree type = TREE_TYPE (misalign_in_elems);
1903       HOST_WIDE_INT elem_size
1904         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1905       /* We only do prolog peeling if the target alignment is known at compile
1906          time.  */
1907       poly_uint64 align_in_elems =
1908         exact_div (target_align, elem_size);
1909       tree align_in_elems_minus_1 =
1910         build_int_cst (type, align_in_elems - 1);
1911       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1912
1913       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
1914                                  & (align_in_elems - 1)).  */
1915       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1916                                             size_zero_node) < 0;
1917       if (negative)
1918         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1919                              align_in_elems_tree);
1920       else
1921         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1922                              misalign_in_elems);
1923       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1924       iters = fold_convert (niters_type, iters);
1925       unsigned HOST_WIDE_INT align_in_elems_c;
1926       if (align_in_elems.is_constant (&align_in_elems_c))
1927         *bound = align_in_elems_c - 1;
1928       else
1929         *bound = -1;
1930     }
1931
1932   if (dump_enabled_p ())
1933     dump_printf_loc (MSG_NOTE, vect_location,
1934                      "niters for prolog loop: %T\n", iters);
1935
1936   var = create_tmp_var (niters_type, "prolog_loop_niters");
1937   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1938
1939   if (new_stmts)
1940     gimple_seq_add_seq (&stmts, new_stmts);
1941   if (stmts)
1942     {
1943       gcc_assert (single_succ_p (bb));
1944       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1945       if (gsi_end_p (gsi))
1946         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1947       else
1948         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1949     }
1950   return iters_name;
1951 }
1952
1953
1954 /* Function vect_update_init_of_dr
1955
1956    If CODE is PLUS, the vector loop starts NITERS iterations after the
1957    scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
1958    iterations before the scalar one (using masking to skip inactive
1959    elements).  This function updates the information recorded in DR to
1960    account for the difference.  Specifically, it updates the OFFSET
1961    field of DR_INFO.  */
1962
1963 static void
1964 vect_update_init_of_dr (dr_vec_info *dr_info, tree niters, tree_code code)
1965 {
1966   struct data_reference *dr = dr_info->dr;
1967   tree offset = dr_info->offset;
1968   if (!offset)
1969     offset = build_zero_cst (sizetype);
1970
1971   niters = fold_build2 (MULT_EXPR, sizetype,
1972                         fold_convert (sizetype, niters),
1973                         fold_convert (sizetype, DR_STEP (dr)));
1974   offset = fold_build2 (code, sizetype,
1975                         fold_convert (sizetype, offset), niters);
1976   dr_info->offset = offset;
1977 }
1978
1979
1980 /* Function vect_update_inits_of_drs
1981
1982    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
1983    CODE and NITERS are as for vect_update_inits_of_dr.  */
1984
1985 void
1986 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
1987                           tree_code code)
1988 {
1989   unsigned int i;
1990   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1991   struct data_reference *dr;
1992
1993   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
1994
1995   /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
1996      here, but since we might use these niters to update the epilogues niters
1997      and data references we can't insert them here as this definition might not
1998      always dominate its uses.  */
1999   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
2000     niters = fold_convert (sizetype, niters);
2001
2002   FOR_EACH_VEC_ELT (datarefs, i, dr)
2003     {
2004       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2005       if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt)
2006           && !STMT_VINFO_SIMD_LANE_ACCESS_P (dr_info->stmt))
2007         vect_update_init_of_dr (dr_info, niters, code);
2008     }
2009 }
2010
2011 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
2012    by masking.  This involves calculating the number of iterations to
2013    be peeled and then aligning all memory references appropriately.  */
2014
2015 void
2016 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
2017 {
2018   tree misalign_in_elems;
2019   tree type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
2020
2021   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
2022
2023   /* From the information recorded in LOOP_VINFO get the number of iterations
2024      that need to be skipped via masking.  */
2025   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2026     {
2027       poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2028                              - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
2029       misalign_in_elems = build_int_cst (type, misalign);
2030     }
2031   else
2032     {
2033       gimple_seq seq1 = NULL, seq2 = NULL;
2034       misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
2035       misalign_in_elems = fold_convert (type, misalign_in_elems);
2036       misalign_in_elems = force_gimple_operand (misalign_in_elems,
2037                                                 &seq2, true, NULL_TREE);
2038       gimple_seq_add_seq (&seq1, seq2);
2039       if (seq1)
2040         {
2041           edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2042           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
2043           gcc_assert (!new_bb);
2044         }
2045     }
2046
2047   if (dump_enabled_p ())
2048     dump_printf_loc (MSG_NOTE, vect_location,
2049                      "misalignment for fully-masked loop: %T\n",
2050                      misalign_in_elems);
2051
2052   LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
2053
2054   vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
2055 }
2056
2057 /* This function builds ni_name = number of iterations.  Statements
2058    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
2059    it to TRUE if new ssa_var is generated.  */
2060
2061 tree
2062 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
2063 {
2064   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2065   if (TREE_CODE (ni) == INTEGER_CST)
2066     return ni;
2067   else
2068     {
2069       tree ni_name, var;
2070       gimple_seq stmts = NULL;
2071       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2072
2073       var = create_tmp_var (TREE_TYPE (ni), "niters");
2074       ni_name = force_gimple_operand (ni, &stmts, false, var);
2075       if (stmts)
2076         {
2077           gsi_insert_seq_on_edge_immediate (pe, stmts);
2078           if (new_var_p != NULL)
2079             *new_var_p = true;
2080         }
2081
2082       return ni_name;
2083     }
2084 }
2085
2086 /* Calculate the number of iterations above which vectorized loop will be
2087    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
2088    of prolog loop.  If it's integer const, the integer number is also passed
2089    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (inclusive) of the
2090    number of iterations of the prolog loop.  BOUND_EPILOG is the corresponding
2091    value for the epilog loop.  If CHECK_PROFITABILITY is true, TH is the
2092    threshold below which the scalar (rather than vectorized) loop will be
2093    executed.  This function stores the upper bound (inclusive) of the result
2094    in BOUND_SCALAR.  */
2095
2096 static tree
2097 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
2098                              int bound_prolog, poly_int64 bound_epilog, int th,
2099                              poly_uint64 *bound_scalar,
2100                              bool check_profitability)
2101 {
2102   tree type = TREE_TYPE (niters_prolog);
2103   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
2104                              build_int_cst (type, bound_epilog));
2105
2106   *bound_scalar = bound_prolog + bound_epilog;
2107   if (check_profitability)
2108     {
2109       /* TH indicates the minimum niters of vectorized loop, while we
2110          compute the maximum niters of scalar loop.  */
2111       th--;
2112       /* Peeling for constant times.  */
2113       if (int_niters_prolog >= 0)
2114         {
2115           *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
2116           return build_int_cst (type, *bound_scalar);
2117         }
2118       /* Peeling an unknown number of times.  Note that both BOUND_PROLOG
2119          and BOUND_EPILOG are inclusive upper bounds.  */
2120       if (known_ge (th, bound_prolog + bound_epilog))
2121         {
2122           *bound_scalar = th;
2123           return build_int_cst (type, th);
2124         }
2125       /* Need to do runtime comparison.  */
2126       else if (maybe_gt (th, bound_epilog))
2127         {
2128           *bound_scalar = upper_bound (*bound_scalar, th);
2129           return fold_build2 (MAX_EXPR, type,
2130                               build_int_cst (type, th), niters);
2131         }
2132     }
2133   return niters;
2134 }
2135
2136 /* NITERS is the number of times that the original scalar loop executes
2137    after peeling.  Work out the maximum number of iterations N that can
2138    be handled by the vectorized form of the loop and then either:
2139
2140    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
2141
2142         niters_vector = N
2143
2144    b) set *STEP_VECTOR_PTR to one and generate:
2145
2146         niters_vector = N / vf
2147
2148    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
2149    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
2150    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).  */
2151
2152 void
2153 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
2154                              tree *niters_vector_ptr, tree *step_vector_ptr,
2155                              bool niters_no_overflow)
2156 {
2157   tree ni_minus_gap, var;
2158   tree niters_vector, step_vector, type = TREE_TYPE (niters);
2159   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2160   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
2161   tree log_vf = NULL_TREE;
2162
2163   /* If epilogue loop is required because of data accesses with gaps, we
2164      subtract one iteration from the total number of iterations here for
2165      correct calculation of RATIO.  */
2166   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2167     {
2168       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
2169                                   build_one_cst (type));
2170       if (!is_gimple_val (ni_minus_gap))
2171         {
2172           var = create_tmp_var (type, "ni_gap");
2173           gimple *stmts = NULL;
2174           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
2175                                                true, var);
2176           gsi_insert_seq_on_edge_immediate (pe, stmts);
2177         }
2178     }
2179   else
2180     ni_minus_gap = niters;
2181
2182   /* To silence some unexpected warnings, simply initialize to 0. */
2183   unsigned HOST_WIDE_INT const_vf = 0;
2184   if (vf.is_constant (&const_vf)
2185       && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2186     {
2187       /* Create: niters >> log2(vf) */
2188       /* If it's known that niters == number of latch executions + 1 doesn't
2189          overflow, we can generate niters >> log2(vf); otherwise we generate
2190          (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
2191          will be at least one.  */
2192       log_vf = build_int_cst (type, exact_log2 (const_vf));
2193       if (niters_no_overflow)
2194         niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
2195       else
2196         niters_vector
2197           = fold_build2 (PLUS_EXPR, type,
2198                          fold_build2 (RSHIFT_EXPR, type,
2199                                       fold_build2 (MINUS_EXPR, type,
2200                                                    ni_minus_gap,
2201                                                    build_int_cst (type, vf)),
2202                                       log_vf),
2203                          build_int_cst (type, 1));
2204       step_vector = build_one_cst (type);
2205     }
2206   else
2207     {
2208       niters_vector = ni_minus_gap;
2209       step_vector = build_int_cst (type, vf);
2210     }
2211
2212   if (!is_gimple_val (niters_vector))
2213     {
2214       var = create_tmp_var (type, "bnd");
2215       gimple_seq stmts = NULL;
2216       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
2217       gsi_insert_seq_on_edge_immediate (pe, stmts);
2218       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
2219          we set range information to make niters analyzer's life easier.
2220          Note the number of latch iteration value can be TYPE_MAX_VALUE so
2221          we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf.  */
2222       if (stmts != NULL && log_vf)
2223         {
2224           if (niters_no_overflow)
2225             {
2226               value_range vr (type,
2227                               wi::one (TYPE_PRECISION (type)),
2228                               wi::rshift (wi::max_value (TYPE_PRECISION (type),
2229                                                          TYPE_SIGN (type)),
2230                                           exact_log2 (const_vf),
2231                                           TYPE_SIGN (type)));
2232               set_range_info (niters_vector, vr);
2233             }
2234           /* For VF == 1 the vector IV might also overflow so we cannot
2235              assert a minimum value of 1.  */
2236           else if (const_vf > 1)
2237             {
2238               value_range vr (type,
2239                               wi::one (TYPE_PRECISION (type)),
2240                               wi::rshift (wi::max_value (TYPE_PRECISION (type),
2241                                                          TYPE_SIGN (type))
2242                                           - (const_vf - 1),
2243                                           exact_log2 (const_vf), TYPE_SIGN (type))
2244                               + 1);
2245               set_range_info (niters_vector, vr);
2246             }
2247         }
2248     }
2249   *niters_vector_ptr = niters_vector;
2250   *step_vector_ptr = step_vector;
2251
2252   return;
2253 }
2254
2255 /* Given NITERS_VECTOR which is the number of iterations for vectorized
2256    loop specified by LOOP_VINFO after vectorization, compute the number
2257    of iterations before vectorization (niters_vector * vf) and store it
2258    to NITERS_VECTOR_MULT_VF_PTR.  */
2259
2260 static void
2261 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2262                                      tree niters_vector,
2263                                      tree *niters_vector_mult_vf_ptr)
2264 {
2265   /* We should be using a step_vector of VF if VF is variable.  */
2266   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
2267   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2268   tree type = TREE_TYPE (niters_vector);
2269   tree log_vf = build_int_cst (type, exact_log2 (vf));
2270   basic_block exit_bb = single_exit (loop)->dest;
2271
2272   gcc_assert (niters_vector_mult_vf_ptr != NULL);
2273   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2274                                             niters_vector, log_vf);
2275   if (!is_gimple_val (niters_vector_mult_vf))
2276     {
2277       tree var = create_tmp_var (type, "niters_vector_mult_vf");
2278       gimple_seq stmts = NULL;
2279       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2280                                                     &stmts, true, var);
2281       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2282       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2283     }
2284   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2285 }
2286
2287 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2288    this function searches for the corresponding lcssa phi node in exit
2289    bb of LOOP.  If it is found, return the phi result; otherwise return
2290    NULL.  */
2291
2292 static tree
2293 find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
2294                 gphi *lcssa_phi)
2295 {
2296   gphi_iterator gsi;
2297   edge e = single_exit (loop);
2298
2299   gcc_assert (single_pred_p (e->dest));
2300   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2301     {
2302       gphi *phi = gsi.phi ();
2303       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2304                            PHI_ARG_DEF (lcssa_phi, 0), 0))
2305         return PHI_RESULT (phi);
2306     }
2307   return NULL_TREE;
2308 }
2309
2310 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2311    from SECOND/FIRST and puts it at the original loop's preheader/exit
2312    edge, the two loops are arranged as below:
2313
2314        preheader_a:
2315      first_loop:
2316        header_a:
2317          i_1 = PHI<i_0, i_2>;
2318          ...
2319          i_2 = i_1 + 1;
2320          if (cond_a)
2321            goto latch_a;
2322          else
2323            goto between_bb;
2324        latch_a:
2325          goto header_a;
2326
2327        between_bb:
2328          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
2329
2330      second_loop:
2331        header_b:
2332          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
2333                                  or with i_2 if no LCSSA phi is created
2334                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
2335          ...
2336          i_4 = i_3 + 1;
2337          if (cond_b)
2338            goto latch_b;
2339          else
2340            goto exit_bb;
2341        latch_b:
2342          goto header_b;
2343
2344        exit_bb:
2345
2346    This function creates loop closed SSA for the first loop; update the
2347    second loop's PHI nodes by replacing argument on incoming edge with the
2348    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
2349    is false, Loop closed ssa phis will only be created for non-iv phis for
2350    the first loop.
2351
2352    This function assumes exit bb of the first loop is preheader bb of the
2353    second loop, i.e, between_bb in the example code.  With PHIs updated,
2354    the second loop will execute rest iterations of the first.  */
2355
2356 static void
2357 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2358                                    class loop *first, class loop *second,
2359                                    bool create_lcssa_for_iv_phis)
2360 {
2361   gphi_iterator gsi_update, gsi_orig;
2362   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2363
2364   edge first_latch_e = EDGE_SUCC (first->latch, 0);
2365   edge second_preheader_e = loop_preheader_edge (second);
2366   basic_block between_bb = single_exit (first)->dest;
2367
2368   gcc_assert (between_bb == second_preheader_e->src);
2369   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
2370   /* Either the first loop or the second is the loop to be vectorized.  */
2371   gcc_assert (loop == first || loop == second);
2372
2373   for (gsi_orig = gsi_start_phis (first->header),
2374        gsi_update = gsi_start_phis (second->header);
2375        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2376        gsi_next (&gsi_orig), gsi_next (&gsi_update))
2377     {
2378       gphi *orig_phi = gsi_orig.phi ();
2379       gphi *update_phi = gsi_update.phi ();
2380
2381       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
2382       /* Generate lcssa PHI node for the first loop.  */
2383       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
2384       stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
2385       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
2386         {
2387           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2388           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
2389           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
2390           arg = new_res;
2391         }
2392
2393       /* Update PHI node in the second loop by replacing arg on the loop's
2394          incoming edge.  */
2395       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2396     }
2397
2398   /* For epilogue peeling we have to make sure to copy all LC PHIs
2399      for correct vectorization of live stmts.  */
2400   if (loop == first)
2401     {
2402       basic_block orig_exit = single_exit (second)->dest;
2403       for (gsi_orig = gsi_start_phis (orig_exit);
2404            !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
2405         {
2406           gphi *orig_phi = gsi_orig.phi ();
2407           tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
2408           if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p  (orig_arg))
2409             continue;
2410
2411           /* Already created in the above loop.   */
2412           if (find_guard_arg (first, second, orig_phi))
2413             continue;
2414
2415           tree new_res = copy_ssa_name (orig_arg);
2416           gphi *lcphi = create_phi_node (new_res, between_bb);
2417           add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
2418         }
2419     }
2420 }
2421
2422 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
2423    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
2424    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
2425    appear like below:
2426
2427        guard_bb:
2428          if (cond)
2429            goto merge_bb;
2430          else
2431            goto skip_loop;
2432
2433      skip_loop:
2434        header_a:
2435          i_1 = PHI<i_0, i_2>;
2436          ...
2437          i_2 = i_1 + 1;
2438          if (cond_a)
2439            goto latch_a;
2440          else
2441            goto exit_a;
2442        latch_a:
2443          goto header_a;
2444
2445        exit_a:
2446          i_5 = PHI<i_2>;
2447
2448        merge_bb:
2449          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2450
2451      update_loop:
2452        header_b:
2453          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
2454          ...
2455          i_4 = i_3 + 1;
2456          if (cond_b)
2457            goto latch_b;
2458          else
2459            goto exit_bb;
2460        latch_b:
2461          goto header_b;
2462
2463        exit_bb:
2464
2465    This function creates PHI nodes at merge_bb and replaces the use of i_5
2466    in the update_loop's PHI node with the result of new PHI result.  */
2467
2468 static void
2469 slpeel_update_phi_nodes_for_guard1 (class loop *skip_loop,
2470                                     class loop *update_loop,
2471                                     edge guard_edge, edge merge_edge)
2472 {
2473   location_t merge_loc, guard_loc;
2474   edge orig_e = loop_preheader_edge (skip_loop);
2475   edge update_e = loop_preheader_edge (update_loop);
2476   gphi_iterator gsi_orig, gsi_update;
2477
2478   for ((gsi_orig = gsi_start_phis (skip_loop->header),
2479         gsi_update = gsi_start_phis (update_loop->header));
2480        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2481        gsi_next (&gsi_orig), gsi_next (&gsi_update))
2482     {
2483       gphi *orig_phi = gsi_orig.phi ();
2484       gphi *update_phi = gsi_update.phi ();
2485
2486       /* Generate new phi node at merge bb of the guard.  */
2487       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2488       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2489
2490       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
2491          args in NEW_PHI for these edges.  */
2492       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2493       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2494       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2495       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2496       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2497       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2498
2499       /* Update phi in UPDATE_PHI.  */
2500       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2501     }
2502 }
2503
2504 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2505    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
2506    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
2507    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
2508    The CFG looks like:
2509
2510      loop:
2511        header_a:
2512          i_1 = PHI<i_0, i_2>;
2513          ...
2514          i_2 = i_1 + 1;
2515          if (cond_a)
2516            goto latch_a;
2517          else
2518            goto exit_a;
2519        latch_a:
2520          goto header_a;
2521
2522        exit_a:
2523
2524        guard_bb:
2525          if (cond)
2526            goto merge_bb;
2527          else
2528            goto epilog_loop;
2529
2530        ;; fall_through_bb
2531
2532      epilog_loop:
2533        header_b:
2534          i_3 = PHI<i_2, i_4>;
2535          ...
2536          i_4 = i_3 + 1;
2537          if (cond_b)
2538            goto latch_b;
2539          else
2540            goto merge_bb;
2541        latch_b:
2542          goto header_b;
2543
2544        merge_bb:
2545          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2546
2547        exit_bb:
2548          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
2549
2550    For each name used out side EPILOG (i.e - for each name that has a lcssa
2551    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
2552    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
2553    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2554    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
2555    in exit_bb will also be updated.  */
2556
2557 static void
2558 slpeel_update_phi_nodes_for_guard2 (class loop *loop, class loop *epilog,
2559                                     edge guard_edge, edge merge_edge)
2560 {
2561   gphi_iterator gsi;
2562   basic_block merge_bb = guard_edge->dest;
2563
2564   gcc_assert (single_succ_p (merge_bb));
2565   edge e = single_succ_edge (merge_bb);
2566   basic_block exit_bb = e->dest;
2567   gcc_assert (single_pred_p (exit_bb));
2568   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
2569
2570   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2571     {
2572       gphi *update_phi = gsi.phi ();
2573       tree old_arg = PHI_ARG_DEF (update_phi, 0);
2574
2575       tree merge_arg = NULL_TREE;
2576
2577       /* If the old argument is a SSA_NAME use its current_def.  */
2578       if (TREE_CODE (old_arg) == SSA_NAME)
2579         merge_arg = get_current_def (old_arg);
2580       /* If it's a constant or doesn't have a current_def, just use the old
2581          argument.  */
2582       if (!merge_arg)
2583         merge_arg = old_arg;
2584
2585       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2586       /* If the var is live after loop but not a reduction, we simply
2587          use the old arg.  */
2588       if (!guard_arg)
2589         guard_arg = old_arg;
2590
2591       /* Create new phi node in MERGE_BB:  */
2592       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2593       gphi *merge_phi = create_phi_node (new_res, merge_bb);
2594
2595       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2596          the two PHI args in merge_phi for these edges.  */
2597       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2598       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2599
2600       /* Update the original phi in exit_bb.  */
2601       adjust_phi_and_debug_stmts (update_phi, e, new_res);
2602     }
2603 }
2604
2605 /* EPILOG loop is duplicated from the original loop for vectorizing,
2606    the arg of its loop closed ssa PHI needs to be updated.  */
2607
2608 static void
2609 slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
2610 {
2611   gphi_iterator gsi;
2612   basic_block exit_bb = single_exit (epilog)->dest;
2613
2614   gcc_assert (single_pred_p (exit_bb));
2615   edge e = EDGE_PRED (exit_bb, 0);
2616   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2617     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
2618 }
2619
2620 /* EPILOGUE_VINFO is an epilogue loop that we now know would need to
2621    iterate exactly CONST_NITERS times.  Make a final decision about
2622    whether the epilogue loop should be used, returning true if so.  */
2623
2624 static bool
2625 vect_update_epilogue_niters (loop_vec_info epilogue_vinfo,
2626                              unsigned HOST_WIDE_INT const_niters)
2627 {
2628   /* Avoid wrap-around when computing const_niters - 1.  Also reject
2629      using an epilogue loop for a single scalar iteration, even if
2630      we could in principle implement that using partial vectors.  */
2631   unsigned int gap_niters = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
2632   if (const_niters <= gap_niters + 1)
2633     return false;
2634
2635   /* Install the number of iterations.  */
2636   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (epilogue_vinfo));
2637   tree niters_tree = build_int_cst (niters_type, const_niters);
2638   tree nitersm1_tree = build_int_cst (niters_type, const_niters - 1);
2639
2640   LOOP_VINFO_NITERS (epilogue_vinfo) = niters_tree;
2641   LOOP_VINFO_NITERSM1 (epilogue_vinfo) = nitersm1_tree;
2642
2643   /* Decide what to do if the number of epilogue iterations is not
2644      a multiple of the epilogue loop's vectorization factor.  */
2645   return vect_determine_partial_vectors_and_peeling (epilogue_vinfo, true);
2646 }
2647
2648 /* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
2649    Return a value that equals:
2650
2651    - MAIN_LOOP_VALUE when LOOP_VINFO is entered from the main loop and
2652    - SKIP_VALUE when the main loop is skipped.  */
2653
2654 tree
2655 vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
2656                            tree skip_value)
2657 {
2658   gcc_assert (loop_vinfo->main_loop_edge);
2659
2660   tree phi_result = make_ssa_name (TREE_TYPE (main_loop_value));
2661   basic_block bb = loop_vinfo->main_loop_edge->dest;
2662   gphi *new_phi = create_phi_node (phi_result, bb);
2663   add_phi_arg (new_phi, main_loop_value, loop_vinfo->main_loop_edge,
2664                UNKNOWN_LOCATION);
2665   add_phi_arg (new_phi, skip_value,
2666                loop_vinfo->skip_main_loop_edge, UNKNOWN_LOCATION);
2667   return phi_result;
2668 }
2669
2670 /* Function vect_do_peeling.
2671
2672    Input:
2673    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2674
2675        preheader:
2676      LOOP:
2677        header_bb:
2678          loop_body
2679          if (exit_loop_cond) goto exit_bb
2680          else                goto header_bb
2681        exit_bb:
2682
2683    - NITERS: The number of iterations of the loop.
2684    - NITERSM1: The number of iterations of the loop's latch.
2685    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2686    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2687                               CHECK_PROFITABILITY is true.
2688    Output:
2689    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
2690      iterate after vectorization; see vect_set_loop_condition for details.
2691    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2692      should be set to the number of scalar iterations handled by the
2693      vector loop.  The SSA name is only used on exit from the loop.
2694
2695    This function peels prolog and epilog from the loop, adds guards skipping
2696    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
2697    would look like:
2698
2699        guard_bb_1:
2700          if (prefer_scalar_loop) goto merge_bb_1
2701          else                    goto guard_bb_2
2702
2703        guard_bb_2:
2704          if (skip_prolog) goto merge_bb_2
2705          else             goto prolog_preheader
2706
2707        prolog_preheader:
2708      PROLOG:
2709        prolog_header_bb:
2710          prolog_body
2711          if (exit_prolog_cond) goto prolog_exit_bb
2712          else                  goto prolog_header_bb
2713        prolog_exit_bb:
2714
2715        merge_bb_2:
2716
2717        vector_preheader:
2718      VECTOR LOOP:
2719        vector_header_bb:
2720          vector_body
2721          if (exit_vector_cond) goto vector_exit_bb
2722          else                  goto vector_header_bb
2723        vector_exit_bb:
2724
2725        guard_bb_3:
2726          if (skip_epilog) goto merge_bb_3
2727          else             goto epilog_preheader
2728
2729        merge_bb_1:
2730
2731        epilog_preheader:
2732      EPILOG:
2733        epilog_header_bb:
2734          epilog_body
2735          if (exit_epilog_cond) goto merge_bb_3
2736          else                  goto epilog_header_bb
2737
2738        merge_bb_3:
2739
2740    Note this function peels prolog and epilog only if it's necessary,
2741    as well as guards.
2742    This function returns the epilogue loop if a decision was made to vectorize
2743    it, otherwise NULL.
2744
2745    The analysis resulting in this epilogue loop's loop_vec_info was performed
2746    in the same vect_analyze_loop call as the main loop's.  At that time
2747    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
2748    vectorization factors than the main loop.  This list is stored in the main
2749    loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide to
2750    vectorize the epilogue loop for a lower vectorization factor,  the
2751    loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
2752    updated and linked to the epilogue loop.  This is later used to vectorize
2753    the epilogue.  The reason the loop_vec_info needs updating is that it was
2754    constructed based on the original main loop, and the epilogue loop is a
2755    copy of this loop, so all links pointing to statements in the original loop
2756    need updating.  Furthermore, these loop_vec_infos share the
2757    data_reference's records, which will also need to be updated.
2758
2759    TODO: Guard for prefer_scalar_loop should be emitted along with
2760    versioning conditions if loop versioning is needed.  */
2761
2762
2763 class loop *
2764 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
2765                  tree *niters_vector, tree *step_vector,
2766                  tree *niters_vector_mult_vf_var, int th,
2767                  bool check_profitability, bool niters_no_overflow,
2768                  tree *advance)
2769 {
2770   edge e, guard_e;
2771   tree type = TREE_TYPE (niters), guard_cond;
2772   basic_block guard_bb, guard_to;
2773   profile_probability prob_prolog, prob_vector, prob_epilog;
2774   int estimated_vf;
2775   int prolog_peeling = 0;
2776   bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
2777   bool vect_epilogues_updated_niters = false;
2778   /* We currently do not support prolog peeling if the target alignment is not
2779      known at compile time.  'vect_gen_prolog_loop_niters' depends on the
2780      target alignment being constant.  */
2781   dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2782   if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
2783     return NULL;
2784
2785   if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2786     prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2787
2788   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2789   poly_uint64 bound_epilog = 0;
2790   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2791       && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2792     bound_epilog += vf - 1;
2793   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2794     bound_epilog += 1;
2795   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
2796   poly_uint64 bound_scalar = bound_epilog;
2797
2798   if (!prolog_peeling && !epilog_peeling)
2799     return NULL;
2800
2801   /* Before doing any peeling make sure to reset debug binds outside of
2802      the loop refering to defs not in LC SSA.  */
2803   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2804   for (unsigned i = 0; i < loop->num_nodes; ++i)
2805     {
2806       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2807       imm_use_iterator ui;
2808       gimple *use_stmt;
2809       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
2810            gsi_next (&gsi))
2811         {
2812           FOR_EACH_IMM_USE_STMT (use_stmt, ui, gimple_phi_result (gsi.phi ()))
2813             if (gimple_debug_bind_p (use_stmt)
2814                 && loop != gimple_bb (use_stmt)->loop_father
2815                 && !flow_loop_nested_p (loop,
2816                                         gimple_bb (use_stmt)->loop_father))
2817               {
2818                 gimple_debug_bind_reset_value (use_stmt);
2819                 update_stmt (use_stmt);
2820               }
2821         }
2822       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2823            gsi_next (&gsi))
2824         {
2825           ssa_op_iter op_iter;
2826           def_operand_p def_p;
2827           FOR_EACH_SSA_DEF_OPERAND (def_p, gsi_stmt (gsi), op_iter, SSA_OP_DEF)
2828             FOR_EACH_IMM_USE_STMT (use_stmt, ui, DEF_FROM_PTR (def_p))
2829               if (gimple_debug_bind_p (use_stmt)
2830                   && loop != gimple_bb (use_stmt)->loop_father
2831                   && !flow_loop_nested_p (loop,
2832                                           gimple_bb (use_stmt)->loop_father))
2833                 {
2834                   gimple_debug_bind_reset_value (use_stmt);
2835                   update_stmt (use_stmt);
2836                 }
2837         }
2838     }
2839
2840   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
2841   estimated_vf = vect_vf_for_cost (loop_vinfo);
2842   if (estimated_vf == 2)
2843     estimated_vf = 3;
2844   prob_prolog = prob_epilog = profile_probability::guessed_always ()
2845                         .apply_scale (estimated_vf - 1, estimated_vf);
2846
2847   class loop *prolog, *epilog = NULL;
2848   class loop *first_loop = loop;
2849   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
2850
2851   /* SSA form needs to be up-to-date since we are going to manually
2852      update SSA form in slpeel_tree_duplicate_loop_to_edge_cfg and delete all
2853      update SSA state after that, so we have to make sure to not lose any
2854      pending update needs.  */
2855   gcc_assert (!need_ssa_update_p (cfun));
2856
2857   /* If we're vectorizing an epilogue loop, we have ensured that the
2858      virtual operand is in SSA form throughout the vectorized main loop.
2859      Normally it is possible to trace the updated
2860      vector-stmt vdefs back to scalar-stmt vdefs and vector-stmt vuses
2861      back to scalar-stmt vuses, meaning that the effect of the SSA update
2862      remains local to the main loop.  However, there are rare cases in
2863      which the vectorized loop should have vdefs even when the original scalar
2864      loop didn't.  For example, vectorizing a load with IFN_LOAD_LANES
2865      introduces clobbers of the temporary vector array, which in turn
2866      needs new vdefs.  If the scalar loop doesn't write to memory, these
2867      new vdefs will be the only ones in the vector loop.
2868      We are currently defering updating virtual SSA form and creating
2869      of a virtual PHI for this case so we do not have to make sure the
2870      newly introduced virtual def is in LCSSA form.  */
2871
2872   if (MAY_HAVE_DEBUG_BIND_STMTS)
2873     {
2874       gcc_assert (!adjust_vec.exists ());
2875       adjust_vec.create (32);
2876     }
2877   initialize_original_copy_tables ();
2878
2879   /* Record the anchor bb at which the guard should be placed if the scalar
2880      loop might be preferred.  */
2881   basic_block anchor = loop_preheader_edge (loop)->src;
2882
2883   /* Generate the number of iterations for the prolog loop.  We do this here
2884      so that we can also get the upper bound on the number of iterations.  */
2885   tree niters_prolog;
2886   int bound_prolog = 0;
2887   if (prolog_peeling)
2888     niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
2889                                                   &bound_prolog);
2890   else
2891     niters_prolog = build_int_cst (type, 0);
2892
2893   loop_vec_info epilogue_vinfo = NULL;
2894   if (vect_epilogues)
2895     {
2896       epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
2897       loop_vinfo->epilogue_vinfos.ordered_remove (0);
2898     }
2899
2900   tree niters_vector_mult_vf = NULL_TREE;
2901   /* Saving NITERs before the loop, as this may be changed by prologue.  */
2902   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
2903   edge update_e = NULL, skip_e = NULL;
2904   unsigned int lowest_vf = constant_lower_bound (vf);
2905   /* If we know the number of scalar iterations for the main loop we should
2906      check whether after the main loop there are enough iterations left over
2907      for the epilogue.  */
2908   if (vect_epilogues
2909       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2910       && prolog_peeling >= 0
2911       && known_eq (vf, lowest_vf))
2912     {
2913       unsigned HOST_WIDE_INT eiters
2914         = (LOOP_VINFO_INT_NITERS (loop_vinfo)
2915            - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
2916
2917       eiters -= prolog_peeling;
2918       eiters
2919         = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
2920
2921       while (!vect_update_epilogue_niters (epilogue_vinfo, eiters))
2922         {
2923           delete epilogue_vinfo;
2924           epilogue_vinfo = NULL;
2925           if (loop_vinfo->epilogue_vinfos.length () == 0)
2926             {
2927               vect_epilogues = false;
2928               break;
2929             }
2930           epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
2931           loop_vinfo->epilogue_vinfos.ordered_remove (0);
2932         }
2933       vect_epilogues_updated_niters = true;
2934     }
2935   /* Prolog loop may be skipped.  */
2936   bool skip_prolog = (prolog_peeling != 0);
2937   /* Skip this loop to epilog when there are not enough iterations to enter this
2938      vectorized loop.  If true we should perform runtime checks on the NITERS
2939      to check whether we should skip the current vectorized loop.  If we know
2940      the number of scalar iterations we may choose to add a runtime check if
2941      this number "maybe" smaller than the number of iterations required
2942      when we know the number of scalar iterations may potentially
2943      be smaller than the number of iterations required to enter this loop, for
2944      this we use the upper bounds on the prolog and epilog peeling.  When we
2945      don't know the number of iterations and don't require versioning it is
2946      because we have asserted that there are enough scalar iterations to enter
2947      the main loop, so this skip is not necessary.  When we are versioning then
2948      we only add such a skip if we have chosen to vectorize the epilogue.  */
2949   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2950                       ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2951                                   bound_prolog + bound_epilog)
2952                       : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2953                          || vect_epilogues));
2954   /* Epilog loop must be executed if the number of iterations for epilog
2955      loop is known at compile time, otherwise we need to add a check at
2956      the end of vector loop and skip to the end of epilog loop.  */
2957   bool skip_epilog = (prolog_peeling < 0
2958                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2959                       || !vf.is_constant ());
2960   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
2961   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2962     skip_epilog = false;
2963
2964   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2965   auto_vec<profile_count> original_counts;
2966   basic_block *original_bbs = NULL;
2967
2968   if (skip_vector)
2969     {
2970       split_edge (loop_preheader_edge (loop));
2971
2972       if (epilog_peeling && (vect_epilogues || scalar_loop == NULL))
2973         {
2974           original_bbs = get_loop_body (loop);
2975           for (unsigned int i = 0; i < loop->num_nodes; i++)
2976             original_counts.safe_push(original_bbs[i]->count);
2977         }
2978
2979       /* Due to the order in which we peel prolog and epilog, we first
2980          propagate probability to the whole loop.  The purpose is to
2981          avoid adjusting probabilities of both prolog and vector loops
2982          separately.  Note in this case, the probability of epilog loop
2983          needs to be scaled back later.  */
2984       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
2985       if (prob_vector.initialized_p ())
2986         {
2987           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
2988           scale_loop_profile (loop, prob_vector, 0);
2989         }
2990     }
2991
2992   if (vect_epilogues)
2993     /* Make sure to set the epilogue's epilogue scalar loop, such that we can
2994        use the original scalar loop as remaining epilogue if necessary.  */
2995     LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
2996       = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2997
2998   if (prolog_peeling)
2999     {
3000       e = loop_preheader_edge (loop);
3001       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e));
3002
3003       /* Peel prolog and put it on preheader edge of loop.  */
3004       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
3005       gcc_assert (prolog);
3006       prolog->force_vectorize = false;
3007       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
3008       first_loop = prolog;
3009       reset_original_copy_tables ();
3010
3011       /* Update the number of iterations for prolog loop.  */
3012       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
3013       vect_set_loop_condition (prolog, NULL, niters_prolog,
3014                                step_prolog, NULL_TREE, false);
3015
3016       /* Skip the prolog loop.  */
3017       if (skip_prolog)
3018         {
3019           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3020                                     niters_prolog, build_int_cst (type, 0));
3021           guard_bb = loop_preheader_edge (prolog)->src;
3022           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
3023           guard_to = split_edge (loop_preheader_edge (loop));
3024           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3025                                            guard_to, guard_bb,
3026                                            prob_prolog.invert (),
3027                                            irred_flag);
3028           e = EDGE_PRED (guard_to, 0);
3029           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3030           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
3031
3032           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
3033           scale_loop_profile (prolog, prob_prolog, bound_prolog);
3034         }
3035
3036       /* Update init address of DRs.  */
3037       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
3038       /* Update niters for vector loop.  */
3039       LOOP_VINFO_NITERS (loop_vinfo)
3040         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
3041       LOOP_VINFO_NITERSM1 (loop_vinfo)
3042         = fold_build2 (MINUS_EXPR, type,
3043                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3044       bool new_var_p = false;
3045       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
3046       /* It's guaranteed that vector loop bound before vectorization is at
3047          least VF, so set range information for newly generated var.  */
3048       if (new_var_p)
3049         {
3050           value_range vr (type,
3051                           wi::to_wide (build_int_cst (type, lowest_vf)),
3052                           wi::to_wide (TYPE_MAX_VALUE (type)));
3053           set_range_info (niters, vr);
3054         }
3055
3056       /* Prolog iterates at most bound_prolog times, latch iterates at
3057          most bound_prolog - 1 times.  */
3058       record_niter_bound (prolog, bound_prolog - 1, false, true);
3059       delete_update_ssa ();
3060       adjust_vec_debug_stmts ();
3061       scev_reset ();
3062     }
3063
3064   if (epilog_peeling)
3065     {
3066       e = single_exit (loop);
3067       gcc_checking_assert (slpeel_can_duplicate_loop_p (loop, e));
3068
3069       /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
3070          said epilog then we should use a copy of the main loop as a starting
3071          point.  This loop may have already had some preliminary transformations
3072          to allow for more optimal vectorization, for example if-conversion.
3073          If we are not vectorizing the epilog then we should use the scalar loop
3074          as the transformations mentioned above make less or no sense when not
3075          vectorizing.  */
3076       epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
3077       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
3078       gcc_assert (epilog);
3079
3080       epilog->force_vectorize = false;
3081       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
3082
3083       /* Scalar version loop may be preferred.  In this case, add guard
3084          and skip to epilog.  Note this only happens when the number of
3085          iterations of loop is unknown at compile time, otherwise this
3086          won't be vectorized.  */
3087       if (skip_vector)
3088         {
3089           /* Additional epilogue iteration is peeled if gap exists.  */
3090           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
3091                                                 bound_prolog, bound_epilog,
3092                                                 th, &bound_scalar,
3093                                                 check_profitability);
3094           /* Build guard against NITERSM1 since NITERS may overflow.  */
3095           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
3096           guard_bb = anchor;
3097           guard_to = split_edge (loop_preheader_edge (epilog));
3098           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
3099                                            guard_to, guard_bb,
3100                                            prob_vector.invert (),
3101                                            irred_flag);
3102           skip_e = guard_e;
3103           e = EDGE_PRED (guard_to, 0);
3104           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
3105           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
3106
3107           /* Simply propagate profile info from guard_bb to guard_to which is
3108              a merge point of control flow.  */
3109           guard_to->count = guard_bb->count;
3110
3111           /* Restore the counts of the epilog loop if we didn't use the scalar loop. */
3112           if (vect_epilogues || scalar_loop == NULL)
3113             {
3114               gcc_assert(epilog->num_nodes == loop->num_nodes);
3115               basic_block *bbs = get_loop_body (epilog);
3116               for (unsigned int i = 0; i < epilog->num_nodes; i++)
3117                 {
3118                   gcc_assert(get_bb_original (bbs[i]) == original_bbs[i]);
3119                   bbs[i]->count = original_counts[i];
3120                 }
3121               free (bbs);
3122               free (original_bbs);
3123             }
3124         }
3125
3126       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
3127       /* If loop is peeled for non-zero constant times, now niters refers to
3128          orig_niters - prolog_peeling, it won't overflow even the orig_niters
3129          overflows.  */
3130       niters_no_overflow |= (prolog_peeling > 0);
3131       vect_gen_vector_loop_niters (loop_vinfo, niters,
3132                                    niters_vector, step_vector,
3133                                    niters_no_overflow);
3134       if (!integer_onep (*step_vector))
3135         {
3136           /* On exit from the loop we will have an easy way of calcalating
3137              NITERS_VECTOR / STEP * STEP.  Install a dummy definition
3138              until then.  */
3139           niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
3140           SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
3141           *niters_vector_mult_vf_var = niters_vector_mult_vf;
3142         }
3143       else
3144         vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
3145                                              &niters_vector_mult_vf);
3146       /* Update IVs of original loop as if they were advanced by
3147          niters_vector_mult_vf steps.  */
3148       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
3149       update_e = skip_vector ? e : loop_preheader_edge (epilog);
3150       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
3151                                         update_e);
3152
3153       if (skip_epilog)
3154         {
3155           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
3156                                     niters, niters_vector_mult_vf);
3157           guard_bb = single_exit (loop)->dest;
3158           guard_to = split_edge (single_exit (epilog));
3159           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
3160                                            skip_vector ? anchor : guard_bb,
3161                                            prob_epilog.invert (),
3162                                            irred_flag);
3163           if (vect_epilogues)
3164             epilogue_vinfo->skip_this_loop_edge = guard_e;
3165           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
3166                                               single_exit (epilog));
3167           /* Only need to handle basic block before epilog loop if it's not
3168              the guard_bb, which is the case when skip_vector is true.  */
3169           if (guard_bb != bb_before_epilog)
3170             {
3171               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
3172
3173               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
3174             }
3175           scale_loop_profile (epilog, prob_epilog, 0);
3176         }
3177       else
3178         slpeel_update_phi_nodes_for_lcssa (epilog);
3179
3180       unsigned HOST_WIDE_INT bound;
3181       if (bound_scalar.is_constant (&bound))
3182         {
3183           gcc_assert (bound != 0);
3184           /* -1 to convert loop iterations to latch iterations.  */
3185           record_niter_bound (epilog, bound - 1, false, true);
3186         }
3187
3188       delete_update_ssa ();
3189       adjust_vec_debug_stmts ();
3190       scev_reset ();
3191     }
3192
3193   if (vect_epilogues)
3194     {
3195       epilog->aux = epilogue_vinfo;
3196       LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
3197
3198       loop_constraint_clear (epilog, LOOP_C_INFINITE);
3199
3200       /* We now must calculate the number of NITERS performed by the previous
3201          loop and EPILOGUE_NITERS to be performed by the epilogue.  */
3202       tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
3203                                  niters_prolog, niters_vector_mult_vf);
3204
3205       /* If skip_vector we may skip the previous loop, we insert a phi-node to
3206          determine whether we are coming from the previous vectorized loop
3207          using the update_e edge or the skip_vector basic block using the
3208          skip_e edge.  */
3209       if (skip_vector)
3210         {
3211           gcc_assert (update_e != NULL
3212                       && skip_e != NULL
3213                       && !vect_epilogues_updated_niters);
3214           gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
3215                                            update_e->dest);
3216           tree new_ssa = make_ssa_name (TREE_TYPE (niters));
3217           gimple *stmt = gimple_build_assign (new_ssa, niters);
3218           gimple_stmt_iterator gsi;
3219           if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
3220               && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
3221             {
3222               gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
3223               gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
3224             }
3225           else
3226             {
3227               gsi = gsi_last_bb (update_e->src);
3228               gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
3229             }
3230
3231           niters = new_ssa;
3232           add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
3233           add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
3234                        UNKNOWN_LOCATION);
3235           niters = PHI_RESULT (new_phi);
3236           epilogue_vinfo->main_loop_edge = update_e;
3237           epilogue_vinfo->skip_main_loop_edge = skip_e;
3238         }
3239
3240       /* Set ADVANCE to the number of iterations performed by the previous
3241          loop and its prologue.  */
3242       *advance = niters;
3243
3244       if (!vect_epilogues_updated_niters)
3245         {
3246           /* Subtract the number of iterations performed by the vectorized loop
3247              from the number of total iterations.  */
3248           tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
3249                                               before_loop_niters,
3250                                               niters);
3251
3252           LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
3253           LOOP_VINFO_NITERSM1 (epilogue_vinfo)
3254             = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
3255                            epilogue_niters,
3256                            build_one_cst (TREE_TYPE (epilogue_niters)));
3257
3258           /* Decide what to do if the number of epilogue iterations is not
3259              a multiple of the epilogue loop's vectorization factor.
3260              We should have rejected the loop during the analysis phase
3261              if this fails.  */
3262           if (!vect_determine_partial_vectors_and_peeling (epilogue_vinfo,
3263                                                            true))
3264             gcc_unreachable ();
3265         }
3266     }
3267
3268   adjust_vec.release ();
3269   free_original_copy_tables ();
3270
3271   return vect_epilogues ? epilog : NULL;
3272 }
3273
3274 /* Function vect_create_cond_for_niters_checks.
3275
3276    Create a conditional expression that represents the run-time checks for
3277    loop's niter.  The loop is guaranteed to terminate if the run-time
3278    checks hold.
3279
3280    Input:
3281    COND_EXPR  - input conditional expression.  New conditions will be chained
3282                 with logical AND operation.  If it is NULL, then the function
3283                 is used to return the number of alias checks.
3284    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3285                 to be checked.
3286
3287    Output:
3288    COND_EXPR - conditional expression.
3289
3290    The returned COND_EXPR is the conditional expression to be used in the
3291    if statement that controls which version of the loop gets executed at
3292    runtime.  */
3293
3294 static void
3295 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
3296 {
3297   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
3298
3299   if (*cond_expr)
3300     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3301                               *cond_expr, part_cond_expr);
3302   else
3303     *cond_expr = part_cond_expr;
3304 }
3305
3306 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3307    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
3308
3309 static void
3310 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
3311 {
3312   if (*cond_expr)
3313     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3314                               *cond_expr, part_cond_expr);
3315   else
3316     *cond_expr = part_cond_expr;
3317 }
3318
3319 /* Function vect_create_cond_for_align_checks.
3320
3321    Create a conditional expression that represents the alignment checks for
3322    all of data references (array element references) whose alignment must be
3323    checked at runtime.
3324
3325    Input:
3326    COND_EXPR  - input conditional expression.  New conditions will be chained
3327                 with logical AND operation.
3328    LOOP_VINFO - two fields of the loop information are used.
3329                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3330                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3331
3332    Output:
3333    COND_EXPR_STMT_LIST - statements needed to construct the conditional
3334                          expression.
3335    The returned value is the conditional expression to be used in the if
3336    statement that controls which version of the loop gets executed at runtime.
3337
3338    The algorithm makes two assumptions:
3339      1) The number of bytes "n" in a vector is a power of 2.
3340      2) An address "a" is aligned if a%n is zero and that this
3341         test can be done as a&(n-1) == 0.  For example, for 16
3342         byte vectors the test is a&0xf == 0.  */
3343
3344 static void
3345 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3346                                    tree *cond_expr,
3347                                    gimple_seq *cond_expr_stmt_list)
3348 {
3349   const vec<stmt_vec_info> &may_misalign_stmts
3350     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3351   stmt_vec_info stmt_info;
3352   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3353   tree mask_cst;
3354   unsigned int i;
3355   tree int_ptrsize_type;
3356   char tmp_name[20];
3357   tree or_tmp_name = NULL_TREE;
3358   tree and_tmp_name;
3359   gimple *and_stmt;
3360   tree ptrsize_zero;
3361   tree part_cond_expr;
3362
3363   /* Check that mask is one less than a power of 2, i.e., mask is
3364      all zeros followed by all ones.  */
3365   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3366
3367   int_ptrsize_type = signed_type_for (ptr_type_node);
3368
3369   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3370      of the first vector of the i'th data reference. */
3371
3372   FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3373     {
3374       gimple_seq new_stmt_list = NULL;
3375       tree addr_base;
3376       tree addr_tmp_name;
3377       tree new_or_tmp_name;
3378       gimple *addr_stmt, *or_stmt;
3379       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3380       bool negative = tree_int_cst_compare
3381         (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
3382       tree offset = negative
3383         ? size_int ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
3384                     * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))))
3385         : size_zero_node;
3386
3387       /* create: addr_tmp = (int)(address_of_first_vector) */
3388       addr_base =
3389         vect_create_addr_base_for_vector_ref (loop_vinfo,
3390                                               stmt_info, &new_stmt_list,
3391                                               offset);
3392       if (new_stmt_list != NULL)
3393         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
3394
3395       sprintf (tmp_name, "addr2int%d", i);
3396       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3397       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
3398       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
3399
3400       /* The addresses are OR together.  */
3401
3402       if (or_tmp_name != NULL_TREE)
3403         {
3404           /* create: or_tmp = or_tmp | addr_tmp */
3405           sprintf (tmp_name, "orptrs%d", i);
3406           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
3407           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
3408                                          or_tmp_name, addr_tmp_name);
3409           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
3410           or_tmp_name = new_or_tmp_name;
3411         }
3412       else
3413         or_tmp_name = addr_tmp_name;
3414
3415     } /* end for i */
3416
3417   mask_cst = build_int_cst (int_ptrsize_type, mask);
3418
3419   /* create: and_tmp = or_tmp & mask  */
3420   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
3421
3422   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
3423                                   or_tmp_name, mask_cst);
3424   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
3425
3426   /* Make and_tmp the left operand of the conditional test against zero.
3427      if and_tmp has a nonzero bit then some address is unaligned.  */
3428   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3429   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
3430                                 and_tmp_name, ptrsize_zero);
3431   chain_cond_expr (cond_expr, part_cond_expr);
3432 }
3433
3434 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
3435    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
3436    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
3437    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
3438
3439 static void
3440 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
3441 {
3442   const vec<vec_object_pair> &pairs
3443     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3444   unsigned int i;
3445   vec_object_pair *pair;
3446   FOR_EACH_VEC_ELT (pairs, i, pair)
3447     {
3448       tree addr1 = build_fold_addr_expr (pair->first);
3449       tree addr2 = build_fold_addr_expr (pair->second);
3450       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
3451                                          addr1, addr2);
3452       chain_cond_expr (cond_expr, part_cond_expr);
3453     }
3454 }
3455
3456 /* Create an expression that is true when all lower-bound conditions for
3457    the vectorized loop are met.  Chain this condition with *COND_EXPR.  */
3458
3459 static void
3460 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
3461 {
3462   const vec<vec_lower_bound> &lower_bounds
3463     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3464   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3465     {
3466       tree expr = lower_bounds[i].expr;
3467       tree type = unsigned_type_for (TREE_TYPE (expr));
3468       expr = fold_convert (type, expr);
3469       poly_uint64 bound = lower_bounds[i].min_value;
3470       if (!lower_bounds[i].unsigned_p)
3471         {
3472           expr = fold_build2 (PLUS_EXPR, type, expr,
3473                               build_int_cstu (type, bound - 1));
3474           bound += bound - 1;
3475         }
3476       tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
3477                                          build_int_cstu (type, bound));
3478       chain_cond_expr (cond_expr, part_cond_expr);
3479     }
3480 }
3481
3482 /* Function vect_create_cond_for_alias_checks.
3483
3484    Create a conditional expression that represents the run-time checks for
3485    overlapping of address ranges represented by a list of data references
3486    relations passed as input.
3487
3488    Input:
3489    COND_EXPR  - input conditional expression.  New conditions will be chained
3490                 with logical AND operation.  If it is NULL, then the function
3491                 is used to return the number of alias checks.
3492    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
3493                 to be checked.
3494
3495    Output:
3496    COND_EXPR - conditional expression.
3497
3498    The returned COND_EXPR is the conditional expression to be used in the if
3499    statement that controls which version of the loop gets executed at runtime.
3500 */
3501
3502 void
3503 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
3504 {
3505   const vec<dr_with_seg_len_pair_t> &comp_alias_ddrs =
3506     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3507
3508   if (comp_alias_ddrs.is_empty ())
3509     return;
3510
3511   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
3512                                &comp_alias_ddrs, cond_expr);
3513   if (dump_enabled_p ())
3514     dump_printf_loc (MSG_NOTE, vect_location,
3515                      "created %u versioning for alias checks.\n",
3516                      comp_alias_ddrs.length ());
3517 }
3518
3519
3520 /* Function vect_loop_versioning.
3521
3522    If the loop has data references that may or may not be aligned or/and
3523    has data reference relations whose independence was not proven then
3524    two versions of the loop need to be generated, one which is vectorized
3525    and one which isn't.  A test is then generated to control which of the
3526    loops is executed.  The test checks for the alignment of all of the
3527    data references that may or may not be aligned.  An additional
3528    sequence of runtime tests is generated for each pairs of DDRs whose
3529    independence was not proven.  The vectorized version of loop is
3530    executed only if both alias and alignment tests are passed.
3531
3532    The test generated to check which version of loop is executed
3533    is modified to also check for profitability as indicated by the
3534    cost model threshold TH.
3535
3536    The versioning precondition(s) are placed in *COND_EXPR and
3537    *COND_EXPR_STMT_LIST.  */
3538
3539 class loop *
3540 vect_loop_versioning (loop_vec_info loop_vinfo,
3541                       gimple *loop_vectorized_call)
3542 {
3543   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
3544   class loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
3545   basic_block condition_bb;
3546   gphi_iterator gsi;
3547   gimple_stmt_iterator cond_exp_gsi;
3548   basic_block merge_bb;
3549   basic_block new_exit_bb;
3550   edge new_exit_e, e;
3551   gphi *orig_phi, *new_phi;
3552   tree cond_expr = NULL_TREE;
3553   gimple_seq cond_expr_stmt_list = NULL;
3554   tree arg;
3555   profile_probability prob = profile_probability::likely ();
3556   gimple_seq gimplify_stmt_list = NULL;
3557   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
3558   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3559   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
3560   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
3561   poly_uint64 versioning_threshold
3562     = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3563   tree version_simd_if_cond
3564     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
3565   unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3566
3567   if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3568       && !ordered_p (th, versioning_threshold))
3569     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3570                              build_int_cst (TREE_TYPE (scalar_loop_iters),
3571                                             th - 1));
3572   if (maybe_ne (versioning_threshold, 0U))
3573     {
3574       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3575                                build_int_cst (TREE_TYPE (scalar_loop_iters),
3576                                               versioning_threshold - 1));
3577       if (cond_expr)
3578         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3579                                  expr, cond_expr);
3580       else
3581         cond_expr = expr;
3582     }
3583
3584   tree cost_name = NULL_TREE;
3585   profile_probability prob2 = profile_probability::uninitialized ();
3586   if (cond_expr
3587       && EXPR_P (cond_expr)
3588       && (version_niter
3589           || version_align
3590           || version_alias
3591           || version_simd_if_cond))
3592     {
3593       cost_name = cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3594                                                       &cond_expr_stmt_list,
3595                                                       is_gimple_val, NULL_TREE);
3596       /* Split prob () into two so that the overall probability of passing
3597          both the cost-model and versioning checks is the orig prob.  */
3598       prob2 = prob.split (prob);
3599     }
3600
3601   if (version_niter)
3602     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3603
3604   if (cond_expr)
3605     {
3606       gimple_seq tem = NULL;
3607       cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3608                                           &tem, is_gimple_condexpr_for_cond,
3609                                           NULL_TREE);
3610       gimple_seq_add_seq (&cond_expr_stmt_list, tem);
3611     }
3612
3613   if (version_align)
3614     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3615                                        &cond_expr_stmt_list);
3616
3617   if (version_alias)
3618     {
3619       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
3620       vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
3621       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3622     }
3623
3624   if (version_simd_if_cond)
3625     {
3626       gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3627       if (flag_checking)
3628         if (basic_block bb
3629             = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3630           gcc_assert (bb != loop->header
3631                       && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3632                       && (scalar_loop == NULL
3633                           || (bb != scalar_loop->header
3634                               && dominated_by_p (CDI_DOMINATORS,
3635                                                  scalar_loop->header, bb))));
3636       tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3637       tree c = fold_build2 (NE_EXPR, boolean_type_node,
3638                             version_simd_if_cond, zero);
3639       if (cond_expr)
3640         cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3641                                  c, cond_expr);
3642       else
3643         cond_expr = c;
3644       if (dump_enabled_p ())
3645         dump_printf_loc (MSG_NOTE, vect_location,
3646                          "created versioning for simd if condition check.\n");
3647     }
3648
3649   cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3650                                       &gimplify_stmt_list,
3651                                       is_gimple_condexpr_for_cond, NULL_TREE);
3652   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
3653
3654   /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
3655      invariant in.  */
3656   class loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3657   for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
3658        !gsi_end_p (gsi); gsi_next (&gsi))
3659     {
3660       gimple *stmt = gsi_stmt (gsi);
3661       update_stmt (stmt);
3662       ssa_op_iter iter;
3663       use_operand_p use_p;
3664       basic_block def_bb;
3665       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3666         if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3667             && flow_bb_inside_loop_p (outermost, def_bb))
3668           outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
3669     }
3670
3671   /* Search for the outermost loop we can version.  Avoid versioning of
3672      non-perfect nests but allow if-conversion versioned loops inside.  */
3673   class loop *loop_to_version = loop;
3674   if (flow_loop_nested_p (outermost, loop))
3675     {
3676       if (dump_enabled_p ())
3677         dump_printf_loc (MSG_NOTE, vect_location,
3678                          "trying to apply versioning to outer loop %d\n",
3679                          outermost->num);
3680       if (outermost->num == 0)
3681         outermost = superloop_at_depth (loop, 1);
3682       /* And avoid applying versioning on non-perfect nests.  */
3683       while (loop_to_version != outermost
3684              && (e = single_exit (loop_outer (loop_to_version)))
3685              && !(e->flags & EDGE_COMPLEX)
3686              && (!loop_outer (loop_to_version)->inner->next
3687                  || vect_loop_vectorized_call (loop_to_version))
3688              && (!loop_outer (loop_to_version)->inner->next
3689                  || !loop_outer (loop_to_version)->inner->next->next))
3690         loop_to_version = loop_outer (loop_to_version);
3691     }
3692
3693   /* Apply versioning.  If there is already a scalar version created by
3694      if-conversion re-use that.  Note we cannot re-use the copy of
3695      an if-converted outer-loop when vectorizing the inner loop only.  */
3696   gcond *cond;
3697   if ((!loop_to_version->inner || loop == loop_to_version)
3698       && loop_vectorized_call)
3699     {
3700       gcc_assert (scalar_loop);
3701       condition_bb = gimple_bb (loop_vectorized_call);
3702       cond = as_a <gcond *> (*gsi_last_bb (condition_bb));
3703       gimple_cond_set_condition_from_tree (cond, cond_expr);
3704       update_stmt (cond);
3705
3706       if (cond_expr_stmt_list)
3707         {
3708           cond_exp_gsi = gsi_for_stmt (loop_vectorized_call);
3709           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3710                                  GSI_SAME_STMT);
3711         }
3712
3713       /* if-conversion uses profile_probability::always () for both paths,
3714          reset the paths probabilities appropriately.  */
3715       edge te, fe;
3716       extract_true_false_edges_from_block (condition_bb, &te, &fe);
3717       te->probability = prob;
3718       fe->probability = prob.invert ();
3719       /* We can scale loops counts immediately but have to postpone
3720          scaling the scalar loop because we re-use it during peeling.  */
3721       scale_loop_frequencies (loop_to_version, te->probability);
3722       LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo) = fe->probability;
3723
3724       nloop = scalar_loop;
3725       if (dump_enabled_p ())
3726         dump_printf_loc (MSG_NOTE, vect_location,
3727                          "reusing %sloop version created by if conversion\n",
3728                          loop_to_version != loop ? "outer " : "");
3729     }
3730   else
3731     {
3732       if (loop_to_version != loop
3733           && dump_enabled_p ())
3734         dump_printf_loc (MSG_NOTE, vect_location,
3735                          "applying loop versioning to outer loop %d\n",
3736                          loop_to_version->num);
3737
3738       unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
3739
3740       initialize_original_copy_tables ();
3741       nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
3742                             prob, prob.invert (), prob, prob.invert (), true);
3743       gcc_assert (nloop);
3744       nloop = get_loop_copy (loop);
3745
3746       /* For cycle vectorization with SLP we rely on the PHI arguments
3747          appearing in the same order as the SLP node operands which for the
3748          loop PHI nodes means the preheader edge dest index needs to remain
3749          the same for the analyzed loop which also becomes the vectorized one.
3750          Make it so in case the state after versioning differs by redirecting
3751          the first edge into the header to the same destination which moves
3752          it last.  */
3753       if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
3754         {
3755           edge e = EDGE_PRED (loop->header, 0);
3756           ssa_redirect_edge (e, e->dest);
3757           flush_pending_stmts (e);
3758         }
3759       gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
3760
3761       /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
3762          reap those otherwise;  they also refer to the original
3763          loops.  */
3764       class loop *l = loop;
3765       while (gimple *call = vect_loop_vectorized_call (l))
3766         {
3767           call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
3768           fold_loop_internal_call (call, boolean_false_node);
3769           l = loop_outer (l);
3770         }
3771       free_original_copy_tables ();
3772
3773       if (cond_expr_stmt_list)
3774         {
3775           cond_exp_gsi = gsi_last_bb (condition_bb);
3776           gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3777                                  GSI_SAME_STMT);
3778         }
3779
3780       /* Loop versioning violates an assumption we try to maintain during
3781          vectorization - that the loop exit block has a single predecessor.
3782          After versioning, the exit block of both loop versions is the same
3783          basic block (i.e. it has two predecessors). Just in order to simplify
3784          following transformations in the vectorizer, we fix this situation
3785          here by adding a new (empty) block on the exit-edge of the loop,
3786          with the proper loop-exit phis to maintain loop-closed-form.
3787          If loop versioning wasn't done from loop, but scalar_loop instead,
3788          merge_bb will have already just a single successor.  */
3789
3790       merge_bb = single_exit (loop_to_version)->dest;
3791       if (EDGE_COUNT (merge_bb->preds) >= 2)
3792         {
3793           gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3794           new_exit_bb = split_edge (single_exit (loop_to_version));
3795           new_exit_e = single_exit (loop_to_version);
3796           e = EDGE_SUCC (new_exit_bb, 0);
3797
3798           for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
3799                gsi_next (&gsi))
3800             {
3801               tree new_res;
3802               orig_phi = gsi.phi ();
3803               new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3804               new_phi = create_phi_node (new_res, new_exit_bb);
3805               arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3806               add_phi_arg (new_phi, arg, new_exit_e,
3807                            gimple_phi_arg_location_from_edge (orig_phi, e));
3808               adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3809             }
3810         }
3811
3812       update_ssa (TODO_update_ssa_no_phi);
3813     }
3814
3815   /* Split the cost model check off to a separate BB.  Costing assumes
3816      this is the only thing we perform when we enter the scalar loop
3817      from a failed cost decision.  */
3818   if (cost_name && TREE_CODE (cost_name) == SSA_NAME)
3819     {
3820       gimple *def = SSA_NAME_DEF_STMT (cost_name);
3821       gcc_assert (gimple_bb (def) == condition_bb);
3822       /* All uses of the cost check are 'true' after the check we
3823          are going to insert.  */
3824       replace_uses_by (cost_name, boolean_true_node);
3825       /* And we're going to build the new single use of it.  */
3826       gcond *cond = gimple_build_cond (NE_EXPR, cost_name, boolean_false_node,
3827                                        NULL_TREE, NULL_TREE);
3828       edge e = split_block (gimple_bb (def), def);
3829       gimple_stmt_iterator gsi = gsi_for_stmt (def);
3830       gsi_insert_after (&gsi, cond, GSI_NEW_STMT);
3831       edge true_e, false_e;
3832       extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
3833       e->flags &= ~EDGE_FALLTHRU;
3834       e->flags |= EDGE_TRUE_VALUE;
3835       edge e2 = make_edge (e->src, false_e->dest, EDGE_FALSE_VALUE);
3836       e->probability = prob2;
3837       e2->probability = prob2.invert ();
3838       set_immediate_dominator (CDI_DOMINATORS, false_e->dest, e->src);
3839       auto_vec<basic_block, 3> adj;
3840       for (basic_block son = first_dom_son (CDI_DOMINATORS, e->dest);
3841            son;
3842            son = next_dom_son (CDI_DOMINATORS, son))
3843         if (EDGE_COUNT (son->preds) > 1)
3844           adj.safe_push (son);
3845       for (auto son : adj)
3846         set_immediate_dominator (CDI_DOMINATORS, son, e->src);
3847     }
3848
3849   if (version_niter)
3850     {
3851       /* The versioned loop could be infinite, we need to clear existing
3852          niter information which is copied from the original loop.  */
3853       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
3854       vect_free_loop_info_assumptions (nloop);
3855     }
3856
3857   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
3858       && dump_enabled_p ())
3859     {
3860       if (version_alias)
3861         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3862                          vect_location,
3863                          "loop versioned for vectorization because of "
3864                          "possible aliasing\n");
3865       if (version_align)
3866         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3867                          vect_location,
3868                          "loop versioned for vectorization to enhance "
3869                          "alignment\n");
3870
3871     }
3872
3873   return nloop;
3874 }