gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44 #include "gimple-fold.h"
  45
  46 /*************************************************************************
  47   Simple Loop Peeling Utilities
  48
  49   Utilities to support loop peeling for vectorization purposes.
  50  *************************************************************************/
  51
  52
  53 /* Renames the use *OP_P.  */
  54
  55 static void
  56 rename_use_op (use_operand_p op_p)
  57 {
  58   tree new_name;
  59
  60   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  61     return;
  62
  63   new_name = get_current_def (USE_FROM_PTR (op_p));
  64
  65   /* Something defined outside of the loop.  */
  66   if (!new_name)
  67     return;
  68
  69   /* An ordinary ssa name defined in the loop.  */
  70
  71   SET_USE (op_p, new_name);
  72 }
  73
  74
  75 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  76    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  77    true.  */
  78
  79 static void
  80 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  81 {
  82   gimple *stmt;
  83   use_operand_p use_p;
  84   ssa_op_iter iter;
  85   edge e;
  86   edge_iterator ei;
  87   struct loop *loop = bb->loop_father;
  88   struct loop *outer_loop = NULL;
  89
  90   if (rename_from_outer_loop)
  91     {
  92       gcc_assert (loop);
  93       outer_loop = loop_outer (loop);
  94     }
  95
  96   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  97        gsi_next (&gsi))
  98     {
  99       stmt = gsi_stmt (gsi);
 100       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 101         rename_use_op (use_p);
 102     }
 103
 104   FOR_EACH_EDGE (e, ei, bb->preds)
 105     {
 106       if (!flow_bb_inside_loop_p (loop, e->src))
 107         {
 108           if (!rename_from_outer_loop)
 109             continue;
 110           if (e->src != outer_loop->header)
 111             {
 112               if (outer_loop->inner->next)
 113                 {
 114                   /* If outer_loop has 2 inner loops, allow there to
 115                      be an extra basic block which decides which of the
 116                      two loops to use using LOOP_VECTORIZED.  */
 117                   if (!single_pred_p (e->src)
 118                       || single_pred (e->src) != outer_loop->header)
 119                     continue;
 120                 }
 121             }
 122         }
 123       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 124            gsi_next (&gsi))
 125         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 126     }
 127 }
 128
 129
 130 struct adjust_info
 131 {
 132   tree from, to;
 133   basic_block bb;
 134 };
 135
 136 /* A stack of values to be adjusted in debug stmts.  We have to
 137    process them LIFO, so that the closest substitution applies.  If we
 138    processed them FIFO, without the stack, we might substitute uses
 139    with a PHI DEF that would soon become non-dominant, and when we got
 140    to the suitable one, it wouldn't have anything to substitute any
 141    more.  */
 142 static vec<adjust_info, va_heap> adjust_vec;
 143
 144 /* Adjust any debug stmts that referenced AI->from values to use the
 145    loop-closed AI->to, if the references are dominated by AI->bb and
 146    not by the definition of AI->from.  */
 147
 148 static void
 149 adjust_debug_stmts_now (adjust_info *ai)
 150 {
 151   basic_block bbphi = ai->bb;
 152   tree orig_def = ai->from;
 153   tree new_def = ai->to;
 154   imm_use_iterator imm_iter;
 155   gimple *stmt;
 156   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 157
 158   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 159
 160   /* Adjust any debug stmts that held onto non-loop-closed
 161      references.  */
 162   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 163     {
 164       use_operand_p use_p;
 165       basic_block bbuse;
 166
 167       if (!is_gimple_debug (stmt))
 168         continue;
 169
 170       gcc_assert (gimple_debug_bind_p (stmt));
 171
 172       bbuse = gimple_bb (stmt);
 173
 174       if ((bbuse == bbphi
 175            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 176           && !(bbuse == bbdef
 177                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 178         {
 179           if (new_def)
 180             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 181               SET_USE (use_p, new_def);
 182           else
 183             {
 184               gimple_debug_bind_reset_value (stmt);
 185               update_stmt (stmt);
 186             }
 187         }
 188     }
 189 }
 190
 191 /* Adjust debug stmts as scheduled before.  */
 192
 193 static void
 194 adjust_vec_debug_stmts (void)
 195 {
 196   if (!MAY_HAVE_DEBUG_BIND_STMTS)
 197     return;
 198
 199   gcc_assert (adjust_vec.exists ());
 200
 201   while (!adjust_vec.is_empty ())
 202     {
 203       adjust_debug_stmts_now (&adjust_vec.last ());
 204       adjust_vec.pop ();
 205     }
 206 }
 207
 208 /* Adjust any debug stmts that referenced FROM values to use the
 209    loop-closed TO, if the references are dominated by BB and not by
 210    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 211    will be postponed until adjust_vec_debug_stmts is called.  */
 212
 213 static void
 214 adjust_debug_stmts (tree from, tree to, basic_block bb)
 215 {
 216   adjust_info ai;
 217
 218   if (MAY_HAVE_DEBUG_BIND_STMTS
 219       && TREE_CODE (from) == SSA_NAME
 220       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 221       && ! virtual_operand_p (from))
 222     {
 223       ai.from = from;
 224       ai.to = to;
 225       ai.bb = bb;
 226
 227       if (adjust_vec.exists ())
 228         adjust_vec.safe_push (ai);
 229       else
 230         adjust_debug_stmts_now (&ai);
 231     }
 232 }
 233
 234 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 235    to adjust any debug stmts that referenced the old phi arg,
 236    presumably non-loop-closed references left over from other
 237    transformations.  */
 238
 239 static void
 240 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 241 {
 242   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 243
 244   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 245
 246   if (MAY_HAVE_DEBUG_BIND_STMTS)
 247     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 248                         gimple_bb (update_phi));
 249 }
 250
 251 /* Make LOOP iterate N == (NITERS - STEP) / STEP + 1 times,
 252    where NITERS is known to be outside the range [1, STEP - 1].
 253    This is equivalent to making the loop execute NITERS / STEP
 254    times when NITERS is nonzero and (1 << M) / STEP times otherwise,
 255    where M is the precision of NITERS.
 256
 257    NITERS_MAYBE_ZERO is true if NITERS can be zero, false it is known
 258    to be >= STEP.  In the latter case N is always NITERS / STEP.
 259
 260    If FINAL_IV is nonnull, it is an SSA name that should be set to
 261    N * STEP on exit from the loop.
 262
 263    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 264
 265 void
 266 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters, tree step,
 267                                  tree final_iv, bool niters_maybe_zero)
 268 {
 269   tree indx_before_incr, indx_after_incr;
 270   gcond *cond_stmt;
 271   gcond *orig_cond;
 272   edge pe = loop_preheader_edge (loop);
 273   edge exit_edge = single_exit (loop);
 274   gimple_stmt_iterator loop_cond_gsi;
 275   gimple_stmt_iterator incr_gsi;
 276   bool insert_after;
 277   source_location loop_loc;
 278   enum tree_code code;
 279   tree niters_type = TREE_TYPE (niters);
 280
 281   orig_cond = get_loop_exit_condition (loop);
 282   gcc_assert (orig_cond);
 283   loop_cond_gsi = gsi_for_stmt (orig_cond);
 284
 285   tree init, limit;
 286   if (!niters_maybe_zero && integer_onep (step))
 287     {
 288       /* In this case we can use a simple 0-based IV:
 289
 290          A:
 291            x = 0;
 292            do
 293              {
 294                ...
 295                x += 1;
 296              }
 297            while (x < NITERS);  */
 298       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 299       init = build_zero_cst (niters_type);
 300       limit = niters;
 301     }
 302   else
 303     {
 304       /* The following works for all values of NITERS except 0:
 305
 306          B:
 307            x = 0;
 308            do
 309              {
 310                ...
 311                x += STEP;
 312              }
 313            while (x <= NITERS - STEP);
 314
 315          so that the loop continues to iterate if x + STEP - 1 < NITERS
 316          but stops if x + STEP - 1 >= NITERS.
 317
 318          However, if NITERS is zero, x never hits a value above NITERS - STEP
 319          before wrapping around.  There are two obvious ways of dealing with
 320          this:
 321
 322          - start at STEP - 1 and compare x before incrementing it
 323          - start at -1 and compare x after incrementing it
 324
 325          The latter is simpler and is what we use.  The loop in this case
 326          looks like:
 327
 328          C:
 329            x = -1;
 330            do
 331              {
 332                ...
 333                x += STEP;
 334              }
 335            while (x < NITERS - STEP);
 336
 337          In both cases the loop limit is NITERS - STEP.  */
 338       gimple_seq seq = NULL;
 339       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
 340       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
 341       if (seq)
 342         {
 343           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
 344           gcc_assert (!new_bb);
 345         }
 346       if (niters_maybe_zero)
 347         {
 348           /* Case C.  */
 349           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 350           init = build_all_ones_cst (niters_type);
 351         }
 352       else
 353         {
 354           /* Case B.  */
 355           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
 356           init = build_zero_cst (niters_type);
 357         }
 358     }
 359
 360   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 361   create_iv (init, step, NULL_TREE, loop,
 362              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 363
 364   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 365                                               true, NULL_TREE, true,
 366                                               GSI_SAME_STMT);
 367   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
 368                                      true, GSI_SAME_STMT);
 369
 370   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
 371                                  NULL_TREE);
 372
 373   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 374
 375   /* Remove old loop exit test:  */
 376   gsi_remove (&loop_cond_gsi, true);
 377   free_stmt_vec_info (orig_cond);
 378
 379   loop_loc = find_loop_location (loop);
 380   if (dump_enabled_p ())
 381     {
 382       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 383         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 384                      LOCATION_LINE (loop_loc));
 385       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 386     }
 387
 388   /* Record the number of latch iterations.  */
 389   if (limit == niters)
 390     /* Case A: the loop iterates NITERS times.  Subtract one to get the
 391        latch count.  */
 392     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
 393                                        build_int_cst (niters_type, 1));
 394   else
 395     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
 396        Subtract one from this to get the latch count.  */
 397     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
 398                                        limit, step);
 399
 400   if (final_iv)
 401     {
 402       gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
 403                                              indx_after_incr, init);
 404       gsi_insert_on_edge_immediate (single_exit (loop), assign);
 405     }
 406 }
 407
 408 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 409    For all PHI arguments in FROM->dest and TO->dest from those
 410    edges ensure that TO->dest PHI arguments have current_def
 411    to that in from.  */
 412
 413 static void
 414 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 415 {
 416   gimple_stmt_iterator gsi_from, gsi_to;
 417
 418   for (gsi_from = gsi_start_phis (from->dest),
 419        gsi_to = gsi_start_phis (to->dest);
 420        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 421     {
 422       gimple *from_phi = gsi_stmt (gsi_from);
 423       gimple *to_phi = gsi_stmt (gsi_to);
 424       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 425       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 426       if (virtual_operand_p (from_arg))
 427         {
 428           gsi_next (&gsi_from);
 429           continue;
 430         }
 431       if (virtual_operand_p (to_arg))
 432         {
 433           gsi_next (&gsi_to);
 434           continue;
 435         }
 436       if (TREE_CODE (from_arg) != SSA_NAME)
 437         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 438       else
 439         {
 440           if (get_current_def (to_arg) == NULL_TREE)
 441             set_current_def (to_arg, get_current_def (from_arg));
 442         }
 443       gsi_next (&gsi_from);
 444       gsi_next (&gsi_to);
 445     }
 446
 447   gphi *from_phi = get_virtual_phi (from->dest);
 448   gphi *to_phi = get_virtual_phi (to->dest);
 449   if (from_phi)
 450     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 451                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 452 }
 453
 454
 455 /* Given LOOP this function generates a new copy of it and puts it
 456    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 457    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 458    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 459    entry or exit of LOOP.  */
 460
 461 struct loop *
 462 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 463                                         struct loop *scalar_loop, edge e)
 464 {
 465   struct loop *new_loop;
 466   basic_block *new_bbs, *bbs, *pbbs;
 467   bool at_exit;
 468   bool was_imm_dom;
 469   basic_block exit_dest;
 470   edge exit, new_exit;
 471   bool duplicate_outer_loop = false;
 472
 473   exit = single_exit (loop);
 474   at_exit = (e == exit);
 475   if (!at_exit && e != loop_preheader_edge (loop))
 476     return NULL;
 477
 478   if (scalar_loop == NULL)
 479     scalar_loop = loop;
 480
 481   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 482   pbbs = bbs + 1;
 483   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 484   /* Allow duplication of outer loops.  */
 485   if (scalar_loop->inner)
 486     duplicate_outer_loop = true;
 487   /* Check whether duplication is possible.  */
 488   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 489     {
 490       free (bbs);
 491       return NULL;
 492     }
 493
 494   /* Generate new loop structure.  */
 495   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 496   duplicate_subloops (scalar_loop, new_loop);
 497
 498   exit_dest = exit->dest;
 499   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 500                                           exit_dest) == loop->header ?
 501                  true : false);
 502
 503   /* Also copy the pre-header, this avoids jumping through hoops to
 504      duplicate the loop entry PHI arguments.  Create an empty
 505      pre-header unconditionally for this.  */
 506   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 507   edge entry_e = single_pred_edge (preheader);
 508   bbs[0] = preheader;
 509   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 510
 511   exit = single_exit (scalar_loop);
 512   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 513             &exit, 1, &new_exit, NULL,
 514             at_exit ? loop->latch : e->src, true);
 515   exit = single_exit (loop);
 516   basic_block new_preheader = new_bbs[0];
 517
 518   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 519
 520   if (scalar_loop != loop)
 521     {
 522       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 523          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 524          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 525          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 526          header) to have current_def set, so copy them over.  */
 527       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 528                                                 exit);
 529       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 530                                                            0),
 531                                                 EDGE_SUCC (loop->latch, 0));
 532     }
 533
 534   if (at_exit) /* Add the loop copy at exit.  */
 535     {
 536       if (scalar_loop != loop)
 537         {
 538           gphi_iterator gsi;
 539           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 540
 541           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 542                gsi_next (&gsi))
 543             {
 544               gphi *phi = gsi.phi ();
 545               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 546               location_t orig_locus
 547                 = gimple_phi_arg_location_from_edge (phi, e);
 548
 549               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 550             }
 551         }
 552       redirect_edge_and_branch_force (e, new_preheader);
 553       flush_pending_stmts (e);
 554       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 555       if (was_imm_dom || duplicate_outer_loop)
 556         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 557
 558       /* And remove the non-necessary forwarder again.  Keep the other
 559          one so we have a proper pre-header for the loop at the exit edge.  */
 560       redirect_edge_pred (single_succ_edge (preheader),
 561                           single_pred (preheader));
 562       delete_basic_block (preheader);
 563       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 564                                loop_preheader_edge (scalar_loop)->src);
 565     }
 566   else /* Add the copy at entry.  */
 567     {
 568       if (scalar_loop != loop)
 569         {
 570           /* Remove the non-necessary forwarder of scalar_loop again.  */
 571           redirect_edge_pred (single_succ_edge (preheader),
 572                               single_pred (preheader));
 573           delete_basic_block (preheader);
 574           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 575                                    loop_preheader_edge (scalar_loop)->src);
 576           preheader = split_edge (loop_preheader_edge (loop));
 577           entry_e = single_pred_edge (preheader);
 578         }
 579
 580       redirect_edge_and_branch_force (entry_e, new_preheader);
 581       flush_pending_stmts (entry_e);
 582       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 583
 584       redirect_edge_and_branch_force (new_exit, preheader);
 585       flush_pending_stmts (new_exit);
 586       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 587
 588       /* And remove the non-necessary forwarder again.  Keep the other
 589          one so we have a proper pre-header for the loop at the exit edge.  */
 590       redirect_edge_pred (single_succ_edge (new_preheader),
 591                           single_pred (new_preheader));
 592       delete_basic_block (new_preheader);
 593       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 594                                loop_preheader_edge (new_loop)->src);
 595     }
 596
 597   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
 598   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
 599     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 600
 601   if (scalar_loop != loop)
 602     {
 603       /* Update new_loop->header PHIs, so that on the preheader
 604          edge they are the ones from loop rather than scalar_loop.  */
 605       gphi_iterator gsi_orig, gsi_new;
 606       edge orig_e = loop_preheader_edge (loop);
 607       edge new_e = loop_preheader_edge (new_loop);
 608
 609       for (gsi_orig = gsi_start_phis (loop->header),
 610            gsi_new = gsi_start_phis (new_loop->header);
 611            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 612            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 613         {
 614           gphi *orig_phi = gsi_orig.phi ();
 615           gphi *new_phi = gsi_new.phi ();
 616           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 617           location_t orig_locus
 618             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 619
 620           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 621         }
 622     }
 623
 624   free (new_bbs);
 625   free (bbs);
 626
 627   checking_verify_dominators (CDI_DOMINATORS);
 628
 629   return new_loop;
 630 }
 631
 632
 633 /* Given the condition expression COND, put it as the last statement of
 634    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 635    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 636    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
 637    new edge as irreducible if IRREDUCIBLE_P is true.  */
 638
 639 static edge
 640 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 641                        basic_block guard_to, basic_block dom_bb,
 642                        profile_probability probability, bool irreducible_p)
 643 {
 644   gimple_stmt_iterator gsi;
 645   edge new_e, enter_e;
 646   gcond *cond_stmt;
 647   gimple_seq gimplify_stmt_list = NULL;
 648
 649   enter_e = EDGE_SUCC (guard_bb, 0);
 650   enter_e->flags &= ~EDGE_FALLTHRU;
 651   enter_e->flags |= EDGE_FALSE_VALUE;
 652   gsi = gsi_last_bb (guard_bb);
 653
 654   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 655                                  NULL_TREE);
 656   if (gimplify_stmt_list)
 657     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 658
 659   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 660   gsi = gsi_last_bb (guard_bb);
 661   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 662
 663   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 664   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 665
 666   new_e->probability = probability;
 667   if (irreducible_p)
 668     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
 669
 670   enter_e->probability = probability.invert ();
 671   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 672
 673   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 674   if (enter_e->dest->loop_father->header == enter_e->dest)
 675     split_edge (enter_e);
 676
 677   return new_e;
 678 }
 679
 680
 681 /* This function verifies that the following restrictions apply to LOOP:
 682    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 683        for innermost loop and 5 basic blocks for outer-loop.
 684    (2) it is single entry, single exit
 685    (3) its exit condition is the last stmt in the header
 686    (4) E is the entry/exit edge of LOOP.
 687  */
 688
 689 bool
 690 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 691 {
 692   edge exit_e = single_exit (loop);
 693   edge entry_e = loop_preheader_edge (loop);
 694   gcond *orig_cond = get_loop_exit_condition (loop);
 695   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 696   unsigned int num_bb = loop->inner? 5 : 2;
 697
 698   /* All loops have an outer scope; the only case loop->outer is NULL is for
 699      the function itself.  */
 700   if (!loop_outer (loop)
 701       || loop->num_nodes != num_bb
 702       || !empty_block_p (loop->latch)
 703       || !single_exit (loop)
 704       /* Verify that new loop exit condition can be trivially modified.  */
 705       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 706       || (e != exit_e && e != entry_e))
 707     return false;
 708
 709   return true;
 710 }
 711
 712 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 713    in the exit bb and rename all the uses after the loop.  This simplifies
 714    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 715    (but normally loop closed SSA form doesn't require virtual PHIs to be
 716    in the same form).  Doing this early simplifies the checking what
 717    uses should be renamed.  */
 718
 719 static void
 720 create_lcssa_for_virtual_phi (struct loop *loop)
 721 {
 722   gphi_iterator gsi;
 723   edge exit_e = single_exit (loop);
 724
 725   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 726     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 727       {
 728         gphi *phi = gsi.phi ();
 729         for (gsi = gsi_start_phis (exit_e->dest);
 730              !gsi_end_p (gsi); gsi_next (&gsi))
 731           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 732             break;
 733         if (gsi_end_p (gsi))
 734           {
 735             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 736             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 737             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 738             imm_use_iterator imm_iter;
 739             gimple *stmt;
 740             use_operand_p use_p;
 741
 742             SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
 743               = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
 744             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 745             gimple_phi_set_result (new_phi, new_vop);
 746             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 747               if (stmt != new_phi
 748                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 749                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 750                   SET_USE (use_p, new_vop);
 751           }
 752         break;
 753       }
 754
 755 }
 756
 757 /* Function vect_get_loop_location.
 758
 759    Extract the location of the loop in the source code.
 760    If the loop is not well formed for vectorization, an estimated
 761    location is calculated.
 762    Return the loop location if succeed and NULL if not.  */
 763
 764 source_location
 765 find_loop_location (struct loop *loop)
 766 {
 767   gimple *stmt = NULL;
 768   basic_block bb;
 769   gimple_stmt_iterator si;
 770
 771   if (!loop)
 772     return UNKNOWN_LOCATION;
 773
 774   stmt = get_loop_exit_condition (loop);
 775
 776   if (stmt
 777       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 778     return gimple_location (stmt);
 779
 780   /* If we got here the loop is probably not "well formed",
 781      try to estimate the loop location */
 782
 783   if (!loop->header)
 784     return UNKNOWN_LOCATION;
 785
 786   bb = loop->header;
 787
 788   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 789     {
 790       stmt = gsi_stmt (si);
 791       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 792         return gimple_location (stmt);
 793     }
 794
 795   return UNKNOWN_LOCATION;
 796 }
 797
 798 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 799
 800 static bool
 801 iv_phi_p (gphi *phi)
 802 {
 803   if (virtual_operand_p (PHI_RESULT (phi)))
 804     return false;
 805
 806   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 807   gcc_assert (stmt_info != NULL);
 808   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 809       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 810     return false;
 811
 812   return true;
 813 }
 814
 815 /* Function vect_can_advance_ivs_p
 816
 817    In case the number of iterations that LOOP iterates is unknown at compile
 818    time, an epilog loop will be generated, and the loop induction variables
 819    (IVs) will be "advanced" to the value they are supposed to take just before
 820    the epilog loop.  Here we check that the access function of the loop IVs
 821    and the expression that represents the loop bound are simple enough.
 822    These restrictions will be relaxed in the future.  */
 823
 824 bool
 825 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 826 {
 827   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 828   basic_block bb = loop->header;
 829   gphi_iterator gsi;
 830
 831   /* Analyze phi functions of the loop header.  */
 832
 833   if (dump_enabled_p ())
 834     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 835   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 836     {
 837       tree evolution_part;
 838
 839       gphi *phi = gsi.phi ();
 840       if (dump_enabled_p ())
 841         {
 842           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 843           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 844         }
 845
 846       /* Skip virtual phi's. The data dependences that are associated with
 847          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 848
 849          Skip reduction phis.  */
 850       if (!iv_phi_p (phi))
 851         {
 852           if (dump_enabled_p ())
 853             dump_printf_loc (MSG_NOTE, vect_location,
 854                              "reduc or virtual phi. skip.\n");
 855           continue;
 856         }
 857
 858       /* Analyze the evolution function.  */
 859
 860       evolution_part
 861         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 862       if (evolution_part == NULL_TREE)
 863         {
 864           if (dump_enabled_p ())
 865             dump_printf (MSG_MISSED_OPTIMIZATION,
 866                          "No access function or evolution.\n");
 867           return false;
 868         }
 869
 870       /* FORNOW: We do not transform initial conditions of IVs
 871          which evolution functions are not invariants in the loop.  */
 872
 873       if (!expr_invariant_in_loop_p (loop, evolution_part))
 874         {
 875           if (dump_enabled_p ())
 876             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 877                              "evolution not invariant in loop.\n");
 878           return false;
 879         }
 880
 881       /* FORNOW: We do not transform initial conditions of IVs
 882          which evolution functions are a polynomial of degree >= 2.  */
 883
 884       if (tree_is_chrec (evolution_part))
 885         {
 886           if (dump_enabled_p ())
 887             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 888                              "evolution is chrec.\n");
 889           return false;
 890         }
 891     }
 892
 893   return true;
 894 }
 895
 896
 897 /*   Function vect_update_ivs_after_vectorizer.
 898
 899      "Advance" the induction variables of LOOP to the value they should take
 900      after the execution of LOOP.  This is currently necessary because the
 901      vectorizer does not handle induction variables that are used after the
 902      loop.  Such a situation occurs when the last iterations of LOOP are
 903      peeled, because:
 904      1. We introduced new uses after LOOP for IVs that were not originally used
 905         after LOOP: the IVs of LOOP are now used by an epilog loop.
 906      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 907         times, whereas the loop IVs should be bumped N times.
 908
 909      Input:
 910      - LOOP - a loop that is going to be vectorized. The last few iterations
 911               of LOOP were peeled.
 912      - NITERS - the number of iterations that LOOP executes (before it is
 913                 vectorized). i.e, the number of times the ivs should be bumped.
 914      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 915                   coming out from LOOP on which there are uses of the LOOP ivs
 916                   (this is the path from LOOP->exit to epilog_loop->preheader).
 917
 918                   The new definitions of the ivs are placed in LOOP->exit.
 919                   The phi args associated with the edge UPDATE_E in the bb
 920                   UPDATE_E->dest are updated accordingly.
 921
 922      Assumption 1: Like the rest of the vectorizer, this function assumes
 923      a single loop exit that has a single predecessor.
 924
 925      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 926      organized in the same order.
 927
 928      Assumption 3: The access function of the ivs is simple enough (see
 929      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 930
 931      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 932      coming out of LOOP on which the ivs of LOOP are used (this is the path
 933      that leads to the epilog loop; other paths skip the epilog loop).  This
 934      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 935      needs to have its phis updated.
 936  */
 937
 938 static void
 939 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 940                                   tree niters, edge update_e)
 941 {
 942   gphi_iterator gsi, gsi1;
 943   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 944   basic_block update_bb = update_e->dest;
 945   basic_block exit_bb = single_exit (loop)->dest;
 946
 947   /* Make sure there exists a single-predecessor exit bb:  */
 948   gcc_assert (single_pred_p (exit_bb));
 949   gcc_assert (single_succ_edge (exit_bb) == update_e);
 950
 951   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 952        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 953        gsi_next (&gsi), gsi_next (&gsi1))
 954     {
 955       tree init_expr;
 956       tree step_expr, off;
 957       tree type;
 958       tree var, ni, ni_name;
 959       gimple_stmt_iterator last_gsi;
 960
 961       gphi *phi = gsi.phi ();
 962       gphi *phi1 = gsi1.phi ();
 963       if (dump_enabled_p ())
 964         {
 965           dump_printf_loc (MSG_NOTE, vect_location,
 966                            "vect_update_ivs_after_vectorizer: phi: ");
 967           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 968         }
 969
 970       /* Skip reduction and virtual phis.  */
 971       if (!iv_phi_p (phi))
 972         {
 973           if (dump_enabled_p ())
 974             dump_printf_loc (MSG_NOTE, vect_location,
 975                              "reduc or virtual phi. skip.\n");
 976           continue;
 977         }
 978
 979       type = TREE_TYPE (gimple_phi_result (phi));
 980       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 981       step_expr = unshare_expr (step_expr);
 982
 983       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 984          of degree >= 2 or exponential.  */
 985       gcc_assert (!tree_is_chrec (step_expr));
 986
 987       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 988
 989       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 990                          fold_convert (TREE_TYPE (step_expr), niters),
 991                          step_expr);
 992       if (POINTER_TYPE_P (type))
 993         ni = fold_build_pointer_plus (init_expr, off);
 994       else
 995         ni = fold_build2 (PLUS_EXPR, type,
 996                           init_expr, fold_convert (type, off));
 997
 998       var = create_tmp_var (type, "tmp");
 999
1000       last_gsi = gsi_last_bb (exit_bb);
1001       gimple_seq new_stmts = NULL;
1002       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1003       /* Exit_bb shouldn't be empty.  */
1004       if (!gsi_end_p (last_gsi))
1005         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1006       else
1007         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
1008
1009       /* Fix phi expressions in the successor bb.  */
1010       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1011     }
1012 }
1013
1014 /* Function vect_gen_prolog_loop_niters
1015
1016    Generate the number of iterations which should be peeled as prolog for the
1017    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
1018    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1019    As a result, after the execution of this loop, the data reference DR will
1020    refer to an aligned location.  The following computation is generated:
1021
1022    If the misalignment of DR is known at compile time:
1023      addr_mis = int mis = DR_MISALIGNMENT (dr);
1024    Else, compute address misalignment in bytes:
1025      addr_mis = addr & (vectype_align - 1)
1026
1027    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1028
1029    (elem_size = element type size; an element is the scalar element whose type
1030    is the inner type of the vectype)
1031
1032    The computations will be emitted at the end of BB.  We also compute and
1033    store upper bound (included) of the result in BOUND.
1034
1035    When the step of the data-ref in the loop is not 1 (as in interleaved data
1036    and SLP), the number of iterations of the prolog must be divided by the step
1037    (which is equal to the size of interleaved group).
1038
1039    The above formulas assume that VF == number of elements in the vector. This
1040    may not hold when there are multiple-types in the loop.
1041    In this case, for some data-references in the loop the VF does not represent
1042    the number of elements that fit in the vector.  Therefore, instead of VF we
1043    use TYPE_VECTOR_SUBPARTS.  */
1044
1045 static tree
1046 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1047                              basic_block bb, int *bound)
1048 {
1049   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1050   tree var;
1051   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1052   gimple_seq stmts = NULL, new_stmts = NULL;
1053   tree iters, iters_name;
1054   gimple *dr_stmt = DR_STMT (dr);
1055   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1056   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1057   unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1058
1059   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1060     {
1061       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1062
1063       if (dump_enabled_p ())
1064         dump_printf_loc (MSG_NOTE, vect_location,
1065                          "known peeling = %d.\n", npeel);
1066
1067       iters = build_int_cst (niters_type, npeel);
1068       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1069     }
1070   else
1071     {
1072       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1073       tree offset = negative
1074           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1075       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1076                                                               &stmts, offset);
1077       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1078       tree target_align_minus_1 = build_int_cst (type, target_align - 1);
1079       HOST_WIDE_INT elem_size
1080         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1081       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1082       HOST_WIDE_INT align_in_elems = target_align / elem_size;
1083       tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
1084       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1085       tree misalign_in_bytes;
1086       tree misalign_in_elems;
1087
1088       /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
1089       misalign_in_bytes
1090         = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1091                        target_align_minus_1);
1092
1093       /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
1094       misalign_in_elems
1095         = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
1096
1097       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
1098                                  & (align_in_elems - 1)).  */
1099       if (negative)
1100         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1101                              align_in_elems_tree);
1102       else
1103         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1104                              misalign_in_elems);
1105       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1106       iters = fold_convert (niters_type, iters);
1107       *bound = align_in_elems - 1;
1108     }
1109
1110   if (dump_enabled_p ())
1111     {
1112       dump_printf_loc (MSG_NOTE, vect_location,
1113                        "niters for prolog loop: ");
1114       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1115       dump_printf (MSG_NOTE, "\n");
1116     }
1117
1118   var = create_tmp_var (niters_type, "prolog_loop_niters");
1119   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1120
1121   if (new_stmts)
1122     gimple_seq_add_seq (&stmts, new_stmts);
1123   if (stmts)
1124     {
1125       gcc_assert (single_succ_p (bb));
1126       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1127       if (gsi_end_p (gsi))
1128         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1129       else
1130         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1131     }
1132   return iters_name;
1133 }
1134
1135
1136 /* Function vect_update_init_of_dr
1137
1138    NITERS iterations were peeled from LOOP.  DR represents a data reference
1139    in LOOP.  This function updates the information recorded in DR to
1140    account for the fact that the first NITERS iterations had already been
1141    executed.  Specifically, it updates the OFFSET field of DR.  */
1142
1143 static void
1144 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1145 {
1146   tree offset = DR_OFFSET (dr);
1147
1148   niters = fold_build2 (MULT_EXPR, sizetype,
1149                         fold_convert (sizetype, niters),
1150                         fold_convert (sizetype, DR_STEP (dr)));
1151   offset = fold_build2 (PLUS_EXPR, sizetype,
1152                         fold_convert (sizetype, offset), niters);
1153   DR_OFFSET (dr) = offset;
1154 }
1155
1156
1157 /* Function vect_update_inits_of_drs
1158
1159    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1160    This function updates the information recorded for the data references in
1161    the loop to account for the fact that the first NITERS iterations had
1162    already been executed.  Specifically, it updates the initial_condition of
1163    the access_function of all the data_references in the loop.  */
1164
1165 static void
1166 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1167 {
1168   unsigned int i;
1169   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1170   struct data_reference *dr;
1171
1172   if (dump_enabled_p ())
1173     dump_printf_loc (MSG_NOTE, vect_location,
1174                      "=== vect_update_inits_of_dr ===\n");
1175
1176   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1177   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1178     {
1179       gimple_seq seq;
1180       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1181       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1182
1183       niters = fold_convert (sizetype, niters);
1184       niters = force_gimple_operand (niters, &seq, false, var);
1185       if (seq)
1186         {
1187           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1188           gcc_assert (!new_bb);
1189         }
1190     }
1191
1192   FOR_EACH_VEC_ELT (datarefs, i, dr)
1193     vect_update_init_of_dr (dr, niters);
1194 }
1195
1196
1197 /* This function builds ni_name = number of iterations.  Statements
1198    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
1199    it to TRUE if new ssa_var is generated.  */
1200
1201 tree
1202 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1203 {
1204   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1205   if (TREE_CODE (ni) == INTEGER_CST)
1206     return ni;
1207   else
1208     {
1209       tree ni_name, var;
1210       gimple_seq stmts = NULL;
1211       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1212
1213       var = create_tmp_var (TREE_TYPE (ni), "niters");
1214       ni_name = force_gimple_operand (ni, &stmts, false, var);
1215       if (stmts)
1216         {
1217           gsi_insert_seq_on_edge_immediate (pe, stmts);
1218           if (new_var_p != NULL)
1219             *new_var_p = true;
1220         }
1221
1222       return ni_name;
1223     }
1224 }
1225
1226 /* Calculate the number of iterations above which vectorized loop will be
1227    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1228    of prolog loop.  If it's integer const, the integer number is also passed
1229    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1230    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1231    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1232    (rather than vectorized) loop will be executed.  This function stores
1233    upper bound (included) of the result in BOUND_SCALAR.  */
1234
1235 static tree
1236 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1237                              int bound_prolog, int vfm1, int th,
1238                              int *bound_scalar, bool check_profitability)
1239 {
1240   tree type = TREE_TYPE (niters_prolog);
1241   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1242                              build_int_cst (type, vfm1));
1243
1244   *bound_scalar = vfm1 + bound_prolog;
1245   if (check_profitability)
1246     {
1247       /* TH indicates the minimum niters of vectorized loop, while we
1248          compute the maximum niters of scalar loop.  */
1249       th--;
1250       /* Peeling for constant times.  */
1251       if (int_niters_prolog >= 0)
1252         {
1253           *bound_scalar = (int_niters_prolog + vfm1 < th
1254                             ? th
1255                             : vfm1 + int_niters_prolog);
1256           return build_int_cst (type, *bound_scalar);
1257         }
1258       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1259          bound (inlcuded) of niters of prolog loop.  */
1260       if (th >=  vfm1 + bound_prolog)
1261         {
1262           *bound_scalar = th;
1263           return build_int_cst (type, th);
1264         }
1265       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1266       else if (th > vfm1)
1267         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1268     }
1269   return niters;
1270 }
1271
1272 /* NITERS is the number of times that the original scalar loop executes
1273    after peeling.  Work out the maximum number of iterations N that can
1274    be handled by the vectorized form of the loop and then either:
1275
1276    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
1277
1278         niters_vector = N
1279
1280    b) set *STEP_VECTOR_PTR to one and generate:
1281
1282         niters_vector = N / vf
1283
1284    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1285    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
1286    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).  */
1287
1288 void
1289 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1290                              tree *niters_vector_ptr, tree *step_vector_ptr,
1291                              bool niters_no_overflow)
1292 {
1293   tree ni_minus_gap, var;
1294   tree niters_vector, step_vector, type = TREE_TYPE (niters);
1295   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1296   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1297   tree log_vf = NULL_TREE;
1298
1299   /* If epilogue loop is required because of data accesses with gaps, we
1300      subtract one iteration from the total number of iterations here for
1301      correct calculation of RATIO.  */
1302   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1303     {
1304       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1305                                   build_one_cst (type));
1306       if (!is_gimple_val (ni_minus_gap))
1307         {
1308           var = create_tmp_var (type, "ni_gap");
1309           gimple *stmts = NULL;
1310           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1311                                                true, var);
1312           gsi_insert_seq_on_edge_immediate (pe, stmts);
1313         }
1314     }
1315   else
1316     ni_minus_gap = niters;
1317
1318   if (1)
1319     {
1320       /* Create: niters >> log2(vf) */
1321       /* If it's known that niters == number of latch executions + 1 doesn't
1322          overflow, we can generate niters >> log2(vf); otherwise we generate
1323          (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1324          will be at least one.  */
1325       log_vf = build_int_cst (type, exact_log2 (vf));
1326       if (niters_no_overflow)
1327         niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1328       else
1329         niters_vector
1330           = fold_build2 (PLUS_EXPR, type,
1331                          fold_build2 (RSHIFT_EXPR, type,
1332                                       fold_build2 (MINUS_EXPR, type,
1333                                                    ni_minus_gap,
1334                                                    build_int_cst (type, vf)),
1335                                       log_vf),
1336                          build_int_cst (type, 1));
1337       step_vector = build_one_cst (type);
1338     }
1339   else
1340     {
1341       niters_vector = ni_minus_gap;
1342       step_vector = build_int_cst (type, vf);
1343     }
1344
1345   if (!is_gimple_val (niters_vector))
1346     {
1347       var = create_tmp_var (type, "bnd");
1348       gimple_seq stmts = NULL;
1349       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1350       gsi_insert_seq_on_edge_immediate (pe, stmts);
1351       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1352          we set range information to make niters analyzer's life easier.  */
1353       if (stmts != NULL && log_vf)
1354         set_range_info (niters_vector, VR_RANGE,
1355                         wi::to_wide (build_int_cst (type, 1)),
1356                         wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1357                                                   TYPE_MAX_VALUE (type),
1358                                                   log_vf)));
1359     }
1360   *niters_vector_ptr = niters_vector;
1361   *step_vector_ptr = step_vector;
1362
1363   return;
1364 }
1365
1366 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1367    loop specified by LOOP_VINFO after vectorization, compute the number
1368    of iterations before vectorization (niters_vector * vf) and store it
1369    to NITERS_VECTOR_MULT_VF_PTR.  */
1370
1371 static void
1372 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1373                                      tree niters_vector,
1374                                      tree *niters_vector_mult_vf_ptr)
1375 {
1376   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1377   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1378   tree type = TREE_TYPE (niters_vector);
1379   tree log_vf = build_int_cst (type, exact_log2 (vf));
1380   basic_block exit_bb = single_exit (loop)->dest;
1381
1382   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1383   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1384                                             niters_vector, log_vf);
1385   if (!is_gimple_val (niters_vector_mult_vf))
1386     {
1387       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1388       gimple_seq stmts = NULL;
1389       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1390                                                     &stmts, true, var);
1391       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1392       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1393     }
1394   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1395 }
1396
1397 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1398    from SECOND/FIRST and puts it at the original loop's preheader/exit
1399    edge, the two loops are arranged as below:
1400
1401        preheader_a:
1402      first_loop:
1403        header_a:
1404          i_1 = PHI<i_0, i_2>;
1405          ...
1406          i_2 = i_1 + 1;
1407          if (cond_a)
1408            goto latch_a;
1409          else
1410            goto between_bb;
1411        latch_a:
1412          goto header_a;
1413
1414        between_bb:
1415          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1416
1417      second_loop:
1418        header_b:
1419          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1420                                  or with i_2 if no LCSSA phi is created
1421                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1422          ...
1423          i_4 = i_3 + 1;
1424          if (cond_b)
1425            goto latch_b;
1426          else
1427            goto exit_bb;
1428        latch_b:
1429          goto header_b;
1430
1431        exit_bb:
1432
1433    This function creates loop closed SSA for the first loop; update the
1434    second loop's PHI nodes by replacing argument on incoming edge with the
1435    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1436    is false, Loop closed ssa phis will only be created for non-iv phis for
1437    the first loop.
1438
1439    This function assumes exit bb of the first loop is preheader bb of the
1440    second loop, i.e, between_bb in the example code.  With PHIs updated,
1441    the second loop will execute rest iterations of the first.  */
1442
1443 static void
1444 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1445                                    struct loop *first, struct loop *second,
1446                                    bool create_lcssa_for_iv_phis)
1447 {
1448   gphi_iterator gsi_update, gsi_orig;
1449   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1450
1451   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1452   edge second_preheader_e = loop_preheader_edge (second);
1453   basic_block between_bb = single_exit (first)->dest;
1454
1455   gcc_assert (between_bb == second_preheader_e->src);
1456   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1457   /* Either the first loop or the second is the loop to be vectorized.  */
1458   gcc_assert (loop == first || loop == second);
1459
1460   for (gsi_orig = gsi_start_phis (first->header),
1461        gsi_update = gsi_start_phis (second->header);
1462        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1463        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1464     {
1465       gphi *orig_phi = gsi_orig.phi ();
1466       gphi *update_phi = gsi_update.phi ();
1467
1468       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1469       /* Generate lcssa PHI node for the first loop.  */
1470       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1471       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1472         {
1473           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1474           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1475           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1476           arg = new_res;
1477         }
1478
1479       /* Update PHI node in the second loop by replacing arg on the loop's
1480          incoming edge.  */
1481       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1482     }
1483 }
1484
1485 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1486    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1487    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1488    appear like below:
1489
1490        guard_bb:
1491          if (cond)
1492            goto merge_bb;
1493          else
1494            goto skip_loop;
1495
1496      skip_loop:
1497        header_a:
1498          i_1 = PHI<i_0, i_2>;
1499          ...
1500          i_2 = i_1 + 1;
1501          if (cond_a)
1502            goto latch_a;
1503          else
1504            goto exit_a;
1505        latch_a:
1506          goto header_a;
1507
1508        exit_a:
1509          i_5 = PHI<i_2>;
1510
1511        merge_bb:
1512          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1513
1514      update_loop:
1515        header_b:
1516          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1517          ...
1518          i_4 = i_3 + 1;
1519          if (cond_b)
1520            goto latch_b;
1521          else
1522            goto exit_bb;
1523        latch_b:
1524          goto header_b;
1525
1526        exit_bb:
1527
1528    This function creates PHI nodes at merge_bb and replaces the use of i_5
1529    in the update_loop's PHI node with the result of new PHI result.  */
1530
1531 static void
1532 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1533                                     struct loop *update_loop,
1534                                     edge guard_edge, edge merge_edge)
1535 {
1536   source_location merge_loc, guard_loc;
1537   edge orig_e = loop_preheader_edge (skip_loop);
1538   edge update_e = loop_preheader_edge (update_loop);
1539   gphi_iterator gsi_orig, gsi_update;
1540
1541   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1542         gsi_update = gsi_start_phis (update_loop->header));
1543        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1544        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1545     {
1546       gphi *orig_phi = gsi_orig.phi ();
1547       gphi *update_phi = gsi_update.phi ();
1548
1549       /* Generate new phi node at merge bb of the guard.  */
1550       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1551       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1552
1553       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1554          args in NEW_PHI for these edges.  */
1555       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1556       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1557       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1558       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1559       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1560       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1561
1562       /* Update phi in UPDATE_PHI.  */
1563       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1564     }
1565 }
1566
1567 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1568    this function searches for the corresponding lcssa phi node in exit
1569    bb of LOOP.  If it is found, return the phi result; otherwise return
1570    NULL.  */
1571
1572 static tree
1573 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1574                 gphi *lcssa_phi)
1575 {
1576   gphi_iterator gsi;
1577   edge e = single_exit (loop);
1578
1579   gcc_assert (single_pred_p (e->dest));
1580   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1581     {
1582       gphi *phi = gsi.phi ();
1583       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1584                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1585         return PHI_RESULT (phi);
1586     }
1587   return NULL_TREE;
1588 }
1589
1590 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1591    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1592    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1593    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1594    The CFG looks like:
1595
1596      loop:
1597        header_a:
1598          i_1 = PHI<i_0, i_2>;
1599          ...
1600          i_2 = i_1 + 1;
1601          if (cond_a)
1602            goto latch_a;
1603          else
1604            goto exit_a;
1605        latch_a:
1606          goto header_a;
1607
1608        exit_a:
1609
1610        guard_bb:
1611          if (cond)
1612            goto merge_bb;
1613          else
1614            goto epilog_loop;
1615
1616        ;; fall_through_bb
1617
1618      epilog_loop:
1619        header_b:
1620          i_3 = PHI<i_2, i_4>;
1621          ...
1622          i_4 = i_3 + 1;
1623          if (cond_b)
1624            goto latch_b;
1625          else
1626            goto merge_bb;
1627        latch_b:
1628          goto header_b;
1629
1630        merge_bb:
1631          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1632
1633        exit_bb:
1634          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1635
1636    For each name used out side EPILOG (i.e - for each name that has a lcssa
1637    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1638    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1639    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1640    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1641    in exit_bb will also be updated.  */
1642
1643 static void
1644 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1645                                     edge guard_edge, edge merge_edge)
1646 {
1647   gphi_iterator gsi;
1648   basic_block merge_bb = guard_edge->dest;
1649
1650   gcc_assert (single_succ_p (merge_bb));
1651   edge e = single_succ_edge (merge_bb);
1652   basic_block exit_bb = e->dest;
1653   gcc_assert (single_pred_p (exit_bb));
1654   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1655
1656   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1657     {
1658       gphi *update_phi = gsi.phi ();
1659       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1660       /* This loop-closed-phi actually doesn't represent a use out of the
1661          loop - the phi arg is a constant.  */
1662       if (TREE_CODE (old_arg) != SSA_NAME)
1663         continue;
1664
1665       tree merge_arg = get_current_def (old_arg);
1666       if (!merge_arg)
1667         merge_arg = old_arg;
1668
1669       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1670       /* If the var is live after loop but not a reduction, we simply
1671          use the old arg.  */
1672       if (!guard_arg)
1673         guard_arg = old_arg;
1674
1675       /* Create new phi node in MERGE_BB:  */
1676       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1677       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1678
1679       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1680          the two PHI args in merge_phi for these edges.  */
1681       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1682       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1683
1684       /* Update the original phi in exit_bb.  */
1685       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1686     }
1687 }
1688
1689 /* EPILOG loop is duplicated from the original loop for vectorizing,
1690    the arg of its loop closed ssa PHI needs to be updated.  */
1691
1692 static void
1693 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1694 {
1695   gphi_iterator gsi;
1696   basic_block exit_bb = single_exit (epilog)->dest;
1697
1698   gcc_assert (single_pred_p (exit_bb));
1699   edge e = EDGE_PRED (exit_bb, 0);
1700   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1701     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1702 }
1703
1704 /* Function vect_do_peeling.
1705
1706    Input:
1707    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1708
1709        preheader:
1710      LOOP:
1711        header_bb:
1712          loop_body
1713          if (exit_loop_cond) goto exit_bb
1714          else                goto header_bb
1715        exit_bb:
1716
1717    - NITERS: The number of iterations of the loop.
1718    - NITERSM1: The number of iterations of the loop's latch.
1719    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1720    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1721                               CHECK_PROFITABILITY is true.
1722    Output:
1723    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
1724      iterate after vectorization; see slpeel_make_loop_iterate_ntimes
1725      for details.
1726    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
1727      should be set to the number of scalar iterations handled by the
1728      vector loop.  The SSA name is only used on exit from the loop.
1729
1730    This function peels prolog and epilog from the loop, adds guards skipping
1731    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1732    would look like:
1733
1734        guard_bb_1:
1735          if (prefer_scalar_loop) goto merge_bb_1
1736          else                    goto guard_bb_2
1737
1738        guard_bb_2:
1739          if (skip_prolog) goto merge_bb_2
1740          else             goto prolog_preheader
1741
1742        prolog_preheader:
1743      PROLOG:
1744        prolog_header_bb:
1745          prolog_body
1746          if (exit_prolog_cond) goto prolog_exit_bb
1747          else                  goto prolog_header_bb
1748        prolog_exit_bb:
1749
1750        merge_bb_2:
1751
1752        vector_preheader:
1753      VECTOR LOOP:
1754        vector_header_bb:
1755          vector_body
1756          if (exit_vector_cond) goto vector_exit_bb
1757          else                  goto vector_header_bb
1758        vector_exit_bb:
1759
1760        guard_bb_3:
1761          if (skip_epilog) goto merge_bb_3
1762          else             goto epilog_preheader
1763
1764        merge_bb_1:
1765
1766        epilog_preheader:
1767      EPILOG:
1768        epilog_header_bb:
1769          epilog_body
1770          if (exit_epilog_cond) goto merge_bb_3
1771          else                  goto epilog_header_bb
1772
1773        merge_bb_3:
1774
1775    Note this function peels prolog and epilog only if it's necessary,
1776    as well as guards.
1777    Returns created epilogue or NULL.
1778
1779    TODO: Guard for prefer_scalar_loop should be emitted along with
1780    versioning conditions if loop versioning is needed.  */
1781
1782
1783 struct loop *
1784 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1785                  tree *niters_vector, tree *step_vector,
1786                  tree *niters_vector_mult_vf_var, int th,
1787                  bool check_profitability, bool niters_no_overflow)
1788 {
1789   edge e, guard_e;
1790   tree type = TREE_TYPE (niters), guard_cond;
1791   basic_block guard_bb, guard_to;
1792   profile_probability prob_prolog, prob_vector, prob_epilog;
1793   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1794   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1795   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1796   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1797                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1798
1799   if (!prolog_peeling && !epilog_peeling)
1800     return NULL;
1801
1802   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1803   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1804     vf = 3;
1805   prob_prolog = prob_epilog = profile_probability::guessed_always ()
1806                         .apply_scale (vf - 1, vf);
1807   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1808
1809   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1810   struct loop *first_loop = loop;
1811   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1812   create_lcssa_for_virtual_phi (loop);
1813   update_ssa (TODO_update_ssa_only_virtuals);
1814
1815   if (MAY_HAVE_DEBUG_BIND_STMTS)
1816     {
1817       gcc_assert (!adjust_vec.exists ());
1818       adjust_vec.create (32);
1819     }
1820   initialize_original_copy_tables ();
1821
1822   /* Prolog loop may be skipped.  */
1823   bool skip_prolog = (prolog_peeling != 0);
1824   /* Skip to epilog if scalar loop may be preferred.  It's only needed
1825      when we peel for epilog loop and when it hasn't been checked with
1826      loop versioning.  */
1827   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1828                       && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
1829   /* Epilog loop must be executed if the number of iterations for epilog
1830      loop is known at compile time, otherwise we need to add a check at
1831      the end of vector loop and skip to the end of epilog loop.  */
1832   bool skip_epilog = (prolog_peeling < 0
1833                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1834   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1835   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1836     skip_epilog = false;
1837
1838   /* Record the anchor bb at which guard should be placed if scalar loop
1839      may be preferred.  */
1840   basic_block anchor = loop_preheader_edge (loop)->src;
1841   if (skip_vector)
1842     {
1843       split_edge (loop_preheader_edge (loop));
1844
1845       /* Due to the order in which we peel prolog and epilog, we first
1846          propagate probability to the whole loop.  The purpose is to
1847          avoid adjusting probabilities of both prolog and vector loops
1848          separately.  Note in this case, the probability of epilog loop
1849          needs to be scaled back later.  */
1850       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1851       if (prob_vector.initialized_p ())
1852       scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1853       scale_loop_profile (loop, prob_vector, bound);
1854     }
1855
1856   tree niters_prolog = build_int_cst (type, 0);
1857   source_location loop_loc = find_loop_location (loop);
1858   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1859   if (prolog_peeling)
1860     {
1861       e = loop_preheader_edge (loop);
1862       if (!slpeel_can_duplicate_loop_p (loop, e))
1863         {
1864           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1865                            "loop can't be duplicated to preheader edge.\n");
1866           gcc_unreachable ();
1867         }
1868       /* Peel prolog and put it on preheader edge of loop.  */
1869       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1870       if (!prolog)
1871         {
1872           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1873                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1874           gcc_unreachable ();
1875         }
1876       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1877       first_loop = prolog;
1878       reset_original_copy_tables ();
1879
1880       /* Generate and update the number of iterations for prolog loop.  */
1881       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1882                                                    &bound_prolog);
1883       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
1884       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog, step_prolog,
1885                                        NULL_TREE, false);
1886
1887       /* Skip the prolog loop.  */
1888       if (skip_prolog)
1889         {
1890           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1891                                     niters_prolog, build_int_cst (type, 0));
1892           guard_bb = loop_preheader_edge (prolog)->src;
1893           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1894           guard_to = split_edge (loop_preheader_edge (loop));
1895           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1896                                            guard_to, guard_bb,
1897                                            prob_prolog.invert (),
1898                                            irred_flag);
1899           e = EDGE_PRED (guard_to, 0);
1900           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1901           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1902
1903           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1904           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1905         }
1906       /* Update init address of DRs.  */
1907       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1908       /* Update niters for vector loop.  */
1909       LOOP_VINFO_NITERS (loop_vinfo)
1910         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1911       LOOP_VINFO_NITERSM1 (loop_vinfo)
1912         = fold_build2 (MINUS_EXPR, type,
1913                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1914       bool new_var_p = false;
1915       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1916       /* It's guaranteed that vector loop bound before vectorization is at
1917          least VF, so set range information for newly generated var.  */
1918       if (new_var_p)
1919         set_range_info (niters, VR_RANGE,
1920                         wi::to_wide (build_int_cst (type, vf)),
1921                         wi::to_wide (TYPE_MAX_VALUE (type)));
1922
1923       /* Prolog iterates at most bound_prolog times, latch iterates at
1924          most bound_prolog - 1 times.  */
1925       record_niter_bound (prolog, bound_prolog - 1, false, true);
1926       delete_update_ssa ();
1927       adjust_vec_debug_stmts ();
1928       scev_reset ();
1929     }
1930
1931   if (epilog_peeling)
1932     {
1933       e = single_exit (loop);
1934       if (!slpeel_can_duplicate_loop_p (loop, e))
1935         {
1936           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1937                            "loop can't be duplicated to exit edge.\n");
1938           gcc_unreachable ();
1939         }
1940       /* Peel epilog and put it on exit edge of loop.  */
1941       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1942       if (!epilog)
1943         {
1944           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1945                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1946           gcc_unreachable ();
1947         }
1948       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1949
1950       /* Scalar version loop may be preferred.  In this case, add guard
1951          and skip to epilog.  Note this only happens when the number of
1952          iterations of loop is unknown at compile time, otherwise this
1953          won't be vectorized.  */
1954       if (skip_vector)
1955         {
1956           /* Additional epilogue iteration is peeled if gap exists.  */
1957           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1958           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1959                                                 bound_prolog,
1960                                                 peel_for_gaps ? vf : vf - 1,
1961                                                 th, &bound_scalar,
1962                                                 check_profitability);
1963           /* Build guard against NITERSM1 since NITERS may overflow.  */
1964           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1965           guard_bb = anchor;
1966           guard_to = split_edge (loop_preheader_edge (epilog));
1967           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1968                                            guard_to, guard_bb,
1969                                            prob_vector.invert (),
1970                                            irred_flag);
1971           e = EDGE_PRED (guard_to, 0);
1972           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1973           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1974
1975           /* Simply propagate profile info from guard_bb to guard_to which is
1976              a merge point of control flow.  */
1977           guard_to->count = guard_bb->count;
1978
1979           /* Scale probability of epilog loop back.
1980              FIXME: We should avoid scaling down and back up.  Profile may
1981              get lost if we scale down to 0.  */
1982           basic_block *bbs = get_loop_body (epilog);
1983           for (unsigned int i = 0; i < epilog->num_nodes; i++)
1984             bbs[i]->count = bbs[i]->count.apply_scale
1985                                  (bbs[i]->count,
1986                                   bbs[i]->count.apply_probability
1987                                     (prob_vector));
1988           free (bbs);
1989         }
1990
1991       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1992       tree niters_vector_mult_vf;
1993       /* If loop is peeled for non-zero constant times, now niters refers to
1994          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1995          overflows.  */
1996       niters_no_overflow |= (prolog_peeling > 0);
1997       vect_gen_vector_loop_niters (loop_vinfo, niters,
1998                                    niters_vector, step_vector,
1999                                    niters_no_overflow);
2000       if (!integer_onep (*step_vector))
2001         {
2002           /* On exit from the loop we will have an easy way of calcalating
2003              NITERS_VECTOR / STEP * STEP.  Install a dummy definition
2004              until then.  */
2005           niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2006           SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2007           *niters_vector_mult_vf_var = niters_vector_mult_vf;
2008         }
2009       else
2010         vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2011                                              &niters_vector_mult_vf);
2012       /* Update IVs of original loop as if they were advanced by
2013          niters_vector_mult_vf steps.  */
2014       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2015       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2016       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2017                                         update_e);
2018
2019       if (skip_epilog)
2020         {
2021           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2022                                     niters, niters_vector_mult_vf);
2023           guard_bb = single_exit (loop)->dest;
2024           guard_to = split_edge (single_exit (epilog));
2025           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2026                                            skip_vector ? anchor : guard_bb,
2027                                            prob_epilog.invert (),
2028                                            irred_flag);
2029           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2030                                               single_exit (epilog));
2031           /* Only need to handle basic block before epilog loop if it's not
2032              the guard_bb, which is the case when skip_vector is true.  */
2033           if (guard_bb != bb_before_epilog)
2034             {
2035               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
2036
2037               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
2038             }
2039           scale_loop_profile (epilog, prob_epilog, bound);
2040         }
2041       else
2042         slpeel_update_phi_nodes_for_lcssa (epilog);
2043
2044       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
2045       /* We share epilog loop with scalar version loop.  */
2046       bound = MAX (bound, bound_scalar - 1);
2047       record_niter_bound (epilog, bound, false, true);
2048
2049       delete_update_ssa ();
2050       adjust_vec_debug_stmts ();
2051       scev_reset ();
2052     }
2053   adjust_vec.release ();
2054   free_original_copy_tables ();
2055
2056   return epilog;
2057 }
2058
2059 /* Function vect_create_cond_for_niters_checks.
2060
2061    Create a conditional expression that represents the run-time checks for
2062    loop's niter.  The loop is guaranteed to to terminate if the run-time
2063    checks hold.
2064
2065    Input:
2066    COND_EXPR  - input conditional expression.  New conditions will be chained
2067                 with logical AND operation.  If it is NULL, then the function
2068                 is used to return the number of alias checks.
2069    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2070                 to be checked.
2071
2072    Output:
2073    COND_EXPR - conditional expression.
2074
2075    The returned COND_EXPR is the conditional expression to be used in the
2076    if statement that controls which version of the loop gets executed at
2077    runtime.  */
2078
2079 static void
2080 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2081 {
2082   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2083
2084   if (*cond_expr)
2085     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2086                               *cond_expr, part_cond_expr);
2087   else
2088     *cond_expr = part_cond_expr;
2089 }
2090
2091 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2092    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
2093
2094 static void
2095 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2096 {
2097   if (*cond_expr)
2098     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2099                               *cond_expr, part_cond_expr);
2100   else
2101     *cond_expr = part_cond_expr;
2102 }
2103
2104 /* Function vect_create_cond_for_align_checks.
2105
2106    Create a conditional expression that represents the alignment checks for
2107    all of data references (array element references) whose alignment must be
2108    checked at runtime.
2109
2110    Input:
2111    COND_EXPR  - input conditional expression.  New conditions will be chained
2112                 with logical AND operation.
2113    LOOP_VINFO - two fields of the loop information are used.
2114                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2115                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2116
2117    Output:
2118    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2119                          expression.
2120    The returned value is the conditional expression to be used in the if
2121    statement that controls which version of the loop gets executed at runtime.
2122
2123    The algorithm makes two assumptions:
2124      1) The number of bytes "n" in a vector is a power of 2.
2125      2) An address "a" is aligned if a%n is zero and that this
2126         test can be done as a&(n-1) == 0.  For example, for 16
2127         byte vectors the test is a&0xf == 0.  */
2128
2129 static void
2130 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2131                                    tree *cond_expr,
2132                                    gimple_seq *cond_expr_stmt_list)
2133 {
2134   vec<gimple *> may_misalign_stmts
2135     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2136   gimple *ref_stmt;
2137   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2138   tree mask_cst;
2139   unsigned int i;
2140   tree int_ptrsize_type;
2141   char tmp_name[20];
2142   tree or_tmp_name = NULL_TREE;
2143   tree and_tmp_name;
2144   gimple *and_stmt;
2145   tree ptrsize_zero;
2146   tree part_cond_expr;
2147
2148   /* Check that mask is one less than a power of 2, i.e., mask is
2149      all zeros followed by all ones.  */
2150   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2151
2152   int_ptrsize_type = signed_type_for (ptr_type_node);
2153
2154   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2155      of the first vector of the i'th data reference. */
2156
2157   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2158     {
2159       gimple_seq new_stmt_list = NULL;
2160       tree addr_base;
2161       tree addr_tmp_name;
2162       tree new_or_tmp_name;
2163       gimple *addr_stmt, *or_stmt;
2164       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2165       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2166       bool negative = tree_int_cst_compare
2167         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2168       tree offset = negative
2169         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2170
2171       /* create: addr_tmp = (int)(address_of_first_vector) */
2172       addr_base =
2173         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2174                                               offset);
2175       if (new_stmt_list != NULL)
2176         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2177
2178       sprintf (tmp_name, "addr2int%d", i);
2179       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2180       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2181       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2182
2183       /* The addresses are OR together.  */
2184
2185       if (or_tmp_name != NULL_TREE)
2186         {
2187           /* create: or_tmp = or_tmp | addr_tmp */
2188           sprintf (tmp_name, "orptrs%d", i);
2189           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2190           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2191                                          or_tmp_name, addr_tmp_name);
2192           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2193           or_tmp_name = new_or_tmp_name;
2194         }
2195       else
2196         or_tmp_name = addr_tmp_name;
2197
2198     } /* end for i */
2199
2200   mask_cst = build_int_cst (int_ptrsize_type, mask);
2201
2202   /* create: and_tmp = or_tmp & mask  */
2203   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2204
2205   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2206                                   or_tmp_name, mask_cst);
2207   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2208
2209   /* Make and_tmp the left operand of the conditional test against zero.
2210      if and_tmp has a nonzero bit then some address is unaligned.  */
2211   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2212   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2213                                 and_tmp_name, ptrsize_zero);
2214   chain_cond_expr (cond_expr, part_cond_expr);
2215 }
2216
2217 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2218    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2219    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2220    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
2221
2222 static void
2223 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2224 {
2225   vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2226   unsigned int i;
2227   vec_object_pair *pair;
2228   FOR_EACH_VEC_ELT (pairs, i, pair)
2229     {
2230       tree addr1 = build_fold_addr_expr (pair->first);
2231       tree addr2 = build_fold_addr_expr (pair->second);
2232       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2233                                          addr1, addr2);
2234       chain_cond_expr (cond_expr, part_cond_expr);
2235     }
2236 }
2237
2238 /* Function vect_create_cond_for_alias_checks.
2239
2240    Create a conditional expression that represents the run-time checks for
2241    overlapping of address ranges represented by a list of data references
2242    relations passed as input.
2243
2244    Input:
2245    COND_EXPR  - input conditional expression.  New conditions will be chained
2246                 with logical AND operation.  If it is NULL, then the function
2247                 is used to return the number of alias checks.
2248    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2249                 to be checked.
2250
2251    Output:
2252    COND_EXPR - conditional expression.
2253
2254    The returned COND_EXPR is the conditional expression to be used in the if
2255    statement that controls which version of the loop gets executed at runtime.
2256 */
2257
2258 void
2259 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2260 {
2261   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2262     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2263
2264   if (comp_alias_ddrs.is_empty ())
2265     return;
2266
2267   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2268                                &comp_alias_ddrs, cond_expr);
2269   if (dump_enabled_p ())
2270     dump_printf_loc (MSG_NOTE, vect_location,
2271                      "created %u versioning for alias checks.\n",
2272                      comp_alias_ddrs.length ());
2273 }
2274
2275
2276 /* Function vect_loop_versioning.
2277
2278    If the loop has data references that may or may not be aligned or/and
2279    has data reference relations whose independence was not proven then
2280    two versions of the loop need to be generated, one which is vectorized
2281    and one which isn't.  A test is then generated to control which of the
2282    loops is executed.  The test checks for the alignment of all of the
2283    data references that may or may not be aligned.  An additional
2284    sequence of runtime tests is generated for each pairs of DDRs whose
2285    independence was not proven.  The vectorized version of loop is
2286    executed only if both alias and alignment tests are passed.
2287
2288    The test generated to check which version of loop is executed
2289    is modified to also check for profitability as indicated by the
2290    cost model threshold TH.
2291
2292    The versioning precondition(s) are placed in *COND_EXPR and
2293    *COND_EXPR_STMT_LIST.  */
2294
2295 void
2296 vect_loop_versioning (loop_vec_info loop_vinfo,
2297                       unsigned int th, bool check_profitability,
2298                       poly_uint64 versioning_threshold)
2299 {
2300   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2301   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2302   basic_block condition_bb;
2303   gphi_iterator gsi;
2304   gimple_stmt_iterator cond_exp_gsi;
2305   basic_block merge_bb;
2306   basic_block new_exit_bb;
2307   edge new_exit_e, e;
2308   gphi *orig_phi, *new_phi;
2309   tree cond_expr = NULL_TREE;
2310   gimple_seq cond_expr_stmt_list = NULL;
2311   tree arg;
2312   profile_probability prob = profile_probability::likely ();
2313   gimple_seq gimplify_stmt_list = NULL;
2314   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2315   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2316   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2317   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2318
2319   if (check_profitability)
2320     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2321                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2322                                             th - 1));
2323   if (maybe_ne (versioning_threshold, 0U))
2324     {
2325       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2326                                build_int_cst (TREE_TYPE (scalar_loop_iters),
2327                                               versioning_threshold - 1));
2328       if (cond_expr)
2329         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
2330                                  expr, cond_expr);
2331       else
2332         cond_expr = expr;
2333     }
2334
2335   if (version_niter)
2336     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2337
2338   if (cond_expr)
2339     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2340                                         is_gimple_condexpr, NULL_TREE);
2341
2342   if (version_align)
2343     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2344                                        &cond_expr_stmt_list);
2345
2346   if (version_alias)
2347     {
2348       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2349       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2350     }
2351
2352   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2353                                       is_gimple_condexpr, NULL_TREE);
2354   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2355
2356   initialize_original_copy_tables ();
2357   if (scalar_loop)
2358     {
2359       edge scalar_e;
2360       basic_block preheader, scalar_preheader;
2361
2362       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2363          scale LOOP's frequencies instead.  */
2364       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2365                             prob, prob.invert (), prob, prob.invert (), true);
2366       scale_loop_frequencies (loop, prob);
2367       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2368          while we need to move it above LOOP's preheader.  */
2369       e = loop_preheader_edge (loop);
2370       scalar_e = loop_preheader_edge (scalar_loop);
2371       gcc_assert (empty_block_p (e->src)
2372                   && single_pred_p (e->src));
2373       gcc_assert (empty_block_p (scalar_e->src)
2374                   && single_pred_p (scalar_e->src));
2375       gcc_assert (single_pred_p (condition_bb));
2376       preheader = e->src;
2377       scalar_preheader = scalar_e->src;
2378       scalar_e = find_edge (condition_bb, scalar_preheader);
2379       e = single_pred_edge (preheader);
2380       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2381                                       scalar_preheader);
2382       redirect_edge_and_branch_force (scalar_e, preheader);
2383       redirect_edge_and_branch_force (e, condition_bb);
2384       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2385                                single_pred (condition_bb));
2386       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2387                                single_pred (scalar_preheader));
2388       set_immediate_dominator (CDI_DOMINATORS, preheader,
2389                                condition_bb);
2390     }
2391   else
2392     nloop = loop_version (loop, cond_expr, &condition_bb,
2393                           prob, prob.invert (), prob, prob.invert (), true);
2394
2395   if (version_niter)
2396     {
2397       /* The versioned loop could be infinite, we need to clear existing
2398          niter information which is copied from the original loop.  */
2399       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2400       vect_free_loop_info_assumptions (nloop);
2401       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2402       loop_constraint_set (loop, LOOP_C_INFINITE);
2403     }
2404
2405   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2406       && dump_enabled_p ())
2407     {
2408       if (version_alias)
2409         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2410                          "loop versioned for vectorization because of "
2411                          "possible aliasing\n");
2412       if (version_align)
2413         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414                          "loop versioned for vectorization to enhance "
2415                          "alignment\n");
2416
2417     }
2418   free_original_copy_tables ();
2419
2420   /* Loop versioning violates an assumption we try to maintain during
2421      vectorization - that the loop exit block has a single predecessor.
2422      After versioning, the exit block of both loop versions is the same
2423      basic block (i.e. it has two predecessors). Just in order to simplify
2424      following transformations in the vectorizer, we fix this situation
2425      here by adding a new (empty) block on the exit-edge of the loop,
2426      with the proper loop-exit phis to maintain loop-closed-form.
2427      If loop versioning wasn't done from loop, but scalar_loop instead,
2428      merge_bb will have already just a single successor.  */
2429
2430   merge_bb = single_exit (loop)->dest;
2431   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2432     {
2433       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2434       new_exit_bb = split_edge (single_exit (loop));
2435       new_exit_e = single_exit (loop);
2436       e = EDGE_SUCC (new_exit_bb, 0);
2437
2438       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2439         {
2440           tree new_res;
2441           orig_phi = gsi.phi ();
2442           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2443           new_phi = create_phi_node (new_res, new_exit_bb);
2444           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2445           add_phi_arg (new_phi, arg, new_exit_e,
2446                        gimple_phi_arg_location_from_edge (orig_phi, e));
2447           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2448         }
2449     }
2450
2451   /* End loop-exit-fixes after versioning.  */
2452
2453   if (cond_expr_stmt_list)
2454     {
2455       cond_exp_gsi = gsi_last_bb (condition_bb);
2456       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2457                              GSI_SAME_STMT);
2458     }
2459   update_ssa (TODO_update_ssa);
2460 }