gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44 #include "gimple-fold.h"
  45
  46 /*************************************************************************
  47   Simple Loop Peeling Utilities
  48
  49   Utilities to support loop peeling for vectorization purposes.
  50  *************************************************************************/
  51
  52
  53 /* Renames the use *OP_P.  */
  54
  55 static void
  56 rename_use_op (use_operand_p op_p)
  57 {
  58   tree new_name;
  59
  60   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  61     return;
  62
  63   new_name = get_current_def (USE_FROM_PTR (op_p));
  64
  65   /* Something defined outside of the loop.  */
  66   if (!new_name)
  67     return;
  68
  69   /* An ordinary ssa name defined in the loop.  */
  70
  71   SET_USE (op_p, new_name);
  72 }
  73
  74
  75 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  76    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  77    true.  */
  78
  79 static void
  80 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  81 {
  82   gimple *stmt;
  83   use_operand_p use_p;
  84   ssa_op_iter iter;
  85   edge e;
  86   edge_iterator ei;
  87   struct loop *loop = bb->loop_father;
  88   struct loop *outer_loop = NULL;
  89
  90   if (rename_from_outer_loop)
  91     {
  92       gcc_assert (loop);
  93       outer_loop = loop_outer (loop);
  94     }
  95
  96   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  97        gsi_next (&gsi))
  98     {
  99       stmt = gsi_stmt (gsi);
 100       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 101         rename_use_op (use_p);
 102     }
 103
 104   FOR_EACH_EDGE (e, ei, bb->preds)
 105     {
 106       if (!flow_bb_inside_loop_p (loop, e->src))
 107         {
 108           if (!rename_from_outer_loop)
 109             continue;
 110           if (e->src != outer_loop->header)
 111             {
 112               if (outer_loop->inner->next)
 113                 {
 114                   /* If outer_loop has 2 inner loops, allow there to
 115                      be an extra basic block which decides which of the
 116                      two loops to use using LOOP_VECTORIZED.  */
 117                   if (!single_pred_p (e->src)
 118                       || single_pred (e->src) != outer_loop->header)
 119                     continue;
 120                 }
 121             }
 122         }
 123       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 124            gsi_next (&gsi))
 125         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 126     }
 127 }
 128
 129
 130 struct adjust_info
 131 {
 132   tree from, to;
 133   basic_block bb;
 134 };
 135
 136 /* A stack of values to be adjusted in debug stmts.  We have to
 137    process them LIFO, so that the closest substitution applies.  If we
 138    processed them FIFO, without the stack, we might substitute uses
 139    with a PHI DEF that would soon become non-dominant, and when we got
 140    to the suitable one, it wouldn't have anything to substitute any
 141    more.  */
 142 static vec<adjust_info, va_heap> adjust_vec;
 143
 144 /* Adjust any debug stmts that referenced AI->from values to use the
 145    loop-closed AI->to, if the references are dominated by AI->bb and
 146    not by the definition of AI->from.  */
 147
 148 static void
 149 adjust_debug_stmts_now (adjust_info *ai)
 150 {
 151   basic_block bbphi = ai->bb;
 152   tree orig_def = ai->from;
 153   tree new_def = ai->to;
 154   imm_use_iterator imm_iter;
 155   gimple *stmt;
 156   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 157
 158   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 159
 160   /* Adjust any debug stmts that held onto non-loop-closed
 161      references.  */
 162   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 163     {
 164       use_operand_p use_p;
 165       basic_block bbuse;
 166
 167       if (!is_gimple_debug (stmt))
 168         continue;
 169
 170       gcc_assert (gimple_debug_bind_p (stmt));
 171
 172       bbuse = gimple_bb (stmt);
 173
 174       if ((bbuse == bbphi
 175            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 176           && !(bbuse == bbdef
 177                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 178         {
 179           if (new_def)
 180             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 181               SET_USE (use_p, new_def);
 182           else
 183             {
 184               gimple_debug_bind_reset_value (stmt);
 185               update_stmt (stmt);
 186             }
 187         }
 188     }
 189 }
 190
 191 /* Adjust debug stmts as scheduled before.  */
 192
 193 static void
 194 adjust_vec_debug_stmts (void)
 195 {
 196   if (!MAY_HAVE_DEBUG_BIND_STMTS)
 197     return;
 198
 199   gcc_assert (adjust_vec.exists ());
 200
 201   while (!adjust_vec.is_empty ())
 202     {
 203       adjust_debug_stmts_now (&adjust_vec.last ());
 204       adjust_vec.pop ();
 205     }
 206 }
 207
 208 /* Adjust any debug stmts that referenced FROM values to use the
 209    loop-closed TO, if the references are dominated by BB and not by
 210    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 211    will be postponed until adjust_vec_debug_stmts is called.  */
 212
 213 static void
 214 adjust_debug_stmts (tree from, tree to, basic_block bb)
 215 {
 216   adjust_info ai;
 217
 218   if (MAY_HAVE_DEBUG_BIND_STMTS
 219       && TREE_CODE (from) == SSA_NAME
 220       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 221       && ! virtual_operand_p (from))
 222     {
 223       ai.from = from;
 224       ai.to = to;
 225       ai.bb = bb;
 226
 227       if (adjust_vec.exists ())
 228         adjust_vec.safe_push (ai);
 229       else
 230         adjust_debug_stmts_now (&ai);
 231     }
 232 }
 233
 234 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 235    to adjust any debug stmts that referenced the old phi arg,
 236    presumably non-loop-closed references left over from other
 237    transformations.  */
 238
 239 static void
 240 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 241 {
 242   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 243
 244   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 245
 246   if (MAY_HAVE_DEBUG_BIND_STMTS)
 247     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 248                         gimple_bb (update_phi));
 249 }
 250
 251 /* Make LOOP iterate N == (NITERS - STEP) / STEP + 1 times,
 252    where NITERS is known to be outside the range [1, STEP - 1].
 253    This is equivalent to making the loop execute NITERS / STEP
 254    times when NITERS is nonzero and (1 << M) / STEP times otherwise,
 255    where M is the precision of NITERS.
 256
 257    NITERS_MAYBE_ZERO is true if NITERS can be zero, false it is known
 258    to be >= STEP.  In the latter case N is always NITERS / STEP.
 259
 260    If FINAL_IV is nonnull, it is an SSA name that should be set to
 261    N * STEP on exit from the loop.
 262
 263    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 264
 265 void
 266 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters, tree step,
 267                                  tree final_iv, bool niters_maybe_zero)
 268 {
 269   tree indx_before_incr, indx_after_incr;
 270   gcond *cond_stmt;
 271   gcond *orig_cond;
 272   edge pe = loop_preheader_edge (loop);
 273   edge exit_edge = single_exit (loop);
 274   gimple_stmt_iterator loop_cond_gsi;
 275   gimple_stmt_iterator incr_gsi;
 276   bool insert_after;
 277   source_location loop_loc;
 278   enum tree_code code;
 279   tree niters_type = TREE_TYPE (niters);
 280
 281   orig_cond = get_loop_exit_condition (loop);
 282   gcc_assert (orig_cond);
 283   loop_cond_gsi = gsi_for_stmt (orig_cond);
 284
 285   tree init, limit;
 286   if (!niters_maybe_zero && integer_onep (step))
 287     {
 288       /* In this case we can use a simple 0-based IV:
 289
 290          A:
 291            x = 0;
 292            do
 293              {
 294                ...
 295                x += 1;
 296              }
 297            while (x < NITERS);  */
 298       code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 299       init = build_zero_cst (niters_type);
 300       limit = niters;
 301     }
 302   else
 303     {
 304       /* The following works for all values of NITERS except 0:
 305
 306          B:
 307            x = 0;
 308            do
 309              {
 310                ...
 311                x += STEP;
 312              }
 313            while (x <= NITERS - STEP);
 314
 315          so that the loop continues to iterate if x + STEP - 1 < NITERS
 316          but stops if x + STEP - 1 >= NITERS.
 317
 318          However, if NITERS is zero, x never hits a value above NITERS - STEP
 319          before wrapping around.  There are two obvious ways of dealing with
 320          this:
 321
 322          - start at STEP - 1 and compare x before incrementing it
 323          - start at -1 and compare x after incrementing it
 324
 325          The latter is simpler and is what we use.  The loop in this case
 326          looks like:
 327
 328          C:
 329            x = -1;
 330            do
 331              {
 332                ...
 333                x += STEP;
 334              }
 335            while (x < NITERS - STEP);
 336
 337          In both cases the loop limit is NITERS - STEP.  */
 338       gimple_seq seq = NULL;
 339       limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
 340       limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
 341       if (seq)
 342         {
 343           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
 344           gcc_assert (!new_bb);
 345         }
 346       if (niters_maybe_zero)
 347         {
 348           /* Case C.  */
 349           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 350           init = build_all_ones_cst (niters_type);
 351         }
 352       else
 353         {
 354           /* Case B.  */
 355           code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
 356           init = build_zero_cst (niters_type);
 357         }
 358     }
 359
 360   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 361   create_iv (init, step, NULL_TREE, loop,
 362              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 363
 364   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 365                                               true, NULL_TREE, true,
 366                                               GSI_SAME_STMT);
 367   limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
 368                                      true, GSI_SAME_STMT);
 369
 370   cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
 371                                  NULL_TREE);
 372
 373   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 374
 375   /* Remove old loop exit test:  */
 376   gsi_remove (&loop_cond_gsi, true);
 377   free_stmt_vec_info (orig_cond);
 378
 379   loop_loc = find_loop_location (loop);
 380   if (dump_enabled_p ())
 381     {
 382       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 383         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 384                      LOCATION_LINE (loop_loc));
 385       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 386     }
 387
 388   /* Record the number of latch iterations.  */
 389   if (limit == niters)
 390     /* Case A: the loop iterates NITERS times.  Subtract one to get the
 391        latch count.  */
 392     loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
 393                                        build_int_cst (niters_type, 1));
 394   else
 395     /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
 396        Subtract one from this to get the latch count.  */
 397     loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
 398                                        limit, step);
 399
 400   if (final_iv)
 401     {
 402       gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
 403                                              indx_after_incr, init);
 404       gsi_insert_on_edge_immediate (single_exit (loop), assign);
 405     }
 406 }
 407
 408 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 409    For all PHI arguments in FROM->dest and TO->dest from those
 410    edges ensure that TO->dest PHI arguments have current_def
 411    to that in from.  */
 412
 413 static void
 414 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 415 {
 416   gimple_stmt_iterator gsi_from, gsi_to;
 417
 418   for (gsi_from = gsi_start_phis (from->dest),
 419        gsi_to = gsi_start_phis (to->dest);
 420        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 421     {
 422       gimple *from_phi = gsi_stmt (gsi_from);
 423       gimple *to_phi = gsi_stmt (gsi_to);
 424       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 425       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 426       if (virtual_operand_p (from_arg))
 427         {
 428           gsi_next (&gsi_from);
 429           continue;
 430         }
 431       if (virtual_operand_p (to_arg))
 432         {
 433           gsi_next (&gsi_to);
 434           continue;
 435         }
 436       if (TREE_CODE (from_arg) != SSA_NAME)
 437         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 438       else
 439         {
 440           if (get_current_def (to_arg) == NULL_TREE)
 441             set_current_def (to_arg, get_current_def (from_arg));
 442         }
 443       gsi_next (&gsi_from);
 444       gsi_next (&gsi_to);
 445     }
 446
 447   gphi *from_phi = get_virtual_phi (from->dest);
 448   gphi *to_phi = get_virtual_phi (to->dest);
 449   if (from_phi)
 450     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 451                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 452 }
 453
 454
 455 /* Given LOOP this function generates a new copy of it and puts it
 456    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 457    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 458    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 459    entry or exit of LOOP.  */
 460
 461 struct loop *
 462 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 463                                         struct loop *scalar_loop, edge e)
 464 {
 465   struct loop *new_loop;
 466   basic_block *new_bbs, *bbs, *pbbs;
 467   bool at_exit;
 468   bool was_imm_dom;
 469   basic_block exit_dest;
 470   edge exit, new_exit;
 471   bool duplicate_outer_loop = false;
 472
 473   exit = single_exit (loop);
 474   at_exit = (e == exit);
 475   if (!at_exit && e != loop_preheader_edge (loop))
 476     return NULL;
 477
 478   if (scalar_loop == NULL)
 479     scalar_loop = loop;
 480
 481   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 482   pbbs = bbs + 1;
 483   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 484   /* Allow duplication of outer loops.  */
 485   if (scalar_loop->inner)
 486     duplicate_outer_loop = true;
 487   /* Check whether duplication is possible.  */
 488   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 489     {
 490       free (bbs);
 491       return NULL;
 492     }
 493
 494   /* Generate new loop structure.  */
 495   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 496   duplicate_subloops (scalar_loop, new_loop);
 497
 498   exit_dest = exit->dest;
 499   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 500                                           exit_dest) == loop->header ?
 501                  true : false);
 502
 503   /* Also copy the pre-header, this avoids jumping through hoops to
 504      duplicate the loop entry PHI arguments.  Create an empty
 505      pre-header unconditionally for this.  */
 506   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 507   edge entry_e = single_pred_edge (preheader);
 508   bbs[0] = preheader;
 509   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 510
 511   exit = single_exit (scalar_loop);
 512   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 513             &exit, 1, &new_exit, NULL,
 514             at_exit ? loop->latch : e->src, true);
 515   exit = single_exit (loop);
 516   basic_block new_preheader = new_bbs[0];
 517
 518   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 519
 520   if (scalar_loop != loop)
 521     {
 522       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 523          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 524          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 525          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 526          header) to have current_def set, so copy them over.  */
 527       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 528                                                 exit);
 529       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 530                                                            0),
 531                                                 EDGE_SUCC (loop->latch, 0));
 532     }
 533
 534   if (at_exit) /* Add the loop copy at exit.  */
 535     {
 536       if (scalar_loop != loop)
 537         {
 538           gphi_iterator gsi;
 539           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 540
 541           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 542                gsi_next (&gsi))
 543             {
 544               gphi *phi = gsi.phi ();
 545               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 546               location_t orig_locus
 547                 = gimple_phi_arg_location_from_edge (phi, e);
 548
 549               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 550             }
 551         }
 552       redirect_edge_and_branch_force (e, new_preheader);
 553       flush_pending_stmts (e);
 554       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 555       if (was_imm_dom || duplicate_outer_loop)
 556         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 557
 558       /* And remove the non-necessary forwarder again.  Keep the other
 559          one so we have a proper pre-header for the loop at the exit edge.  */
 560       redirect_edge_pred (single_succ_edge (preheader),
 561                           single_pred (preheader));
 562       delete_basic_block (preheader);
 563       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 564                                loop_preheader_edge (scalar_loop)->src);
 565     }
 566   else /* Add the copy at entry.  */
 567     {
 568       if (scalar_loop != loop)
 569         {
 570           /* Remove the non-necessary forwarder of scalar_loop again.  */
 571           redirect_edge_pred (single_succ_edge (preheader),
 572                               single_pred (preheader));
 573           delete_basic_block (preheader);
 574           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 575                                    loop_preheader_edge (scalar_loop)->src);
 576           preheader = split_edge (loop_preheader_edge (loop));
 577           entry_e = single_pred_edge (preheader);
 578         }
 579
 580       redirect_edge_and_branch_force (entry_e, new_preheader);
 581       flush_pending_stmts (entry_e);
 582       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 583
 584       redirect_edge_and_branch_force (new_exit, preheader);
 585       flush_pending_stmts (new_exit);
 586       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 587
 588       /* And remove the non-necessary forwarder again.  Keep the other
 589          one so we have a proper pre-header for the loop at the exit edge.  */
 590       redirect_edge_pred (single_succ_edge (new_preheader),
 591                           single_pred (new_preheader));
 592       delete_basic_block (new_preheader);
 593       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 594                                loop_preheader_edge (new_loop)->src);
 595     }
 596
 597   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
 598   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
 599     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 600
 601   if (scalar_loop != loop)
 602     {
 603       /* Update new_loop->header PHIs, so that on the preheader
 604          edge they are the ones from loop rather than scalar_loop.  */
 605       gphi_iterator gsi_orig, gsi_new;
 606       edge orig_e = loop_preheader_edge (loop);
 607       edge new_e = loop_preheader_edge (new_loop);
 608
 609       for (gsi_orig = gsi_start_phis (loop->header),
 610            gsi_new = gsi_start_phis (new_loop->header);
 611            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 612            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 613         {
 614           gphi *orig_phi = gsi_orig.phi ();
 615           gphi *new_phi = gsi_new.phi ();
 616           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 617           location_t orig_locus
 618             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 619
 620           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 621         }
 622     }
 623
 624   free (new_bbs);
 625   free (bbs);
 626
 627   checking_verify_dominators (CDI_DOMINATORS);
 628
 629   return new_loop;
 630 }
 631
 632
 633 /* Given the condition expression COND, put it as the last statement of
 634    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 635    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 636    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
 637    new edge as irreducible if IRREDUCIBLE_P is true.  */
 638
 639 static edge
 640 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 641                        basic_block guard_to, basic_block dom_bb,
 642                        profile_probability probability, bool irreducible_p)
 643 {
 644   gimple_stmt_iterator gsi;
 645   edge new_e, enter_e;
 646   gcond *cond_stmt;
 647   gimple_seq gimplify_stmt_list = NULL;
 648
 649   enter_e = EDGE_SUCC (guard_bb, 0);
 650   enter_e->flags &= ~EDGE_FALLTHRU;
 651   enter_e->flags |= EDGE_FALSE_VALUE;
 652   gsi = gsi_last_bb (guard_bb);
 653
 654   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 655                                  NULL_TREE);
 656   if (gimplify_stmt_list)
 657     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 658
 659   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 660   gsi = gsi_last_bb (guard_bb);
 661   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 662
 663   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 664   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 665
 666   new_e->probability = probability;
 667   if (irreducible_p)
 668     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
 669
 670   enter_e->probability = probability.invert ();
 671   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 672
 673   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 674   if (enter_e->dest->loop_father->header == enter_e->dest)
 675     split_edge (enter_e);
 676
 677   return new_e;
 678 }
 679
 680
 681 /* This function verifies that the following restrictions apply to LOOP:
 682    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 683        for innermost loop and 5 basic blocks for outer-loop.
 684    (2) it is single entry, single exit
 685    (3) its exit condition is the last stmt in the header
 686    (4) E is the entry/exit edge of LOOP.
 687  */
 688
 689 bool
 690 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 691 {
 692   edge exit_e = single_exit (loop);
 693   edge entry_e = loop_preheader_edge (loop);
 694   gcond *orig_cond = get_loop_exit_condition (loop);
 695   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 696   unsigned int num_bb = loop->inner? 5 : 2;
 697
 698   /* All loops have an outer scope; the only case loop->outer is NULL is for
 699      the function itself.  */
 700   if (!loop_outer (loop)
 701       || loop->num_nodes != num_bb
 702       || !empty_block_p (loop->latch)
 703       || !single_exit (loop)
 704       /* Verify that new loop exit condition can be trivially modified.  */
 705       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 706       || (e != exit_e && e != entry_e))
 707     return false;
 708
 709   return true;
 710 }
 711
 712 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 713    in the exit bb and rename all the uses after the loop.  This simplifies
 714    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 715    (but normally loop closed SSA form doesn't require virtual PHIs to be
 716    in the same form).  Doing this early simplifies the checking what
 717    uses should be renamed.  */
 718
 719 static void
 720 create_lcssa_for_virtual_phi (struct loop *loop)
 721 {
 722   gphi_iterator gsi;
 723   edge exit_e = single_exit (loop);
 724
 725   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 726     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 727       {
 728         gphi *phi = gsi.phi ();
 729         for (gsi = gsi_start_phis (exit_e->dest);
 730              !gsi_end_p (gsi); gsi_next (&gsi))
 731           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 732             break;
 733         if (gsi_end_p (gsi))
 734           {
 735             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 736             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 737             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 738             imm_use_iterator imm_iter;
 739             gimple *stmt;
 740             use_operand_p use_p;
 741
 742             SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
 743               = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
 744             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 745             gimple_phi_set_result (new_phi, new_vop);
 746             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 747               if (stmt != new_phi
 748                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 749                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 750                   SET_USE (use_p, new_vop);
 751           }
 752         break;
 753       }
 754
 755 }
 756
 757 /* Function vect_get_loop_location.
 758
 759    Extract the location of the loop in the source code.
 760    If the loop is not well formed for vectorization, an estimated
 761    location is calculated.
 762    Return the loop location if succeed and NULL if not.  */
 763
 764 source_location
 765 find_loop_location (struct loop *loop)
 766 {
 767   gimple *stmt = NULL;
 768   basic_block bb;
 769   gimple_stmt_iterator si;
 770
 771   if (!loop)
 772     return UNKNOWN_LOCATION;
 773
 774   stmt = get_loop_exit_condition (loop);
 775
 776   if (stmt
 777       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 778     return gimple_location (stmt);
 779
 780   /* If we got here the loop is probably not "well formed",
 781      try to estimate the loop location */
 782
 783   if (!loop->header)
 784     return UNKNOWN_LOCATION;
 785
 786   bb = loop->header;
 787
 788   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 789     {
 790       stmt = gsi_stmt (si);
 791       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 792         return gimple_location (stmt);
 793     }
 794
 795   return UNKNOWN_LOCATION;
 796 }
 797
 798 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 799
 800 static bool
 801 iv_phi_p (gphi *phi)
 802 {
 803   if (virtual_operand_p (PHI_RESULT (phi)))
 804     return false;
 805
 806   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 807   gcc_assert (stmt_info != NULL);
 808   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 809       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 810     return false;
 811
 812   return true;
 813 }
 814
 815 /* Function vect_can_advance_ivs_p
 816
 817    In case the number of iterations that LOOP iterates is unknown at compile
 818    time, an epilog loop will be generated, and the loop induction variables
 819    (IVs) will be "advanced" to the value they are supposed to take just before
 820    the epilog loop.  Here we check that the access function of the loop IVs
 821    and the expression that represents the loop bound are simple enough.
 822    These restrictions will be relaxed in the future.  */
 823
 824 bool
 825 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 826 {
 827   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 828   basic_block bb = loop->header;
 829   gphi_iterator gsi;
 830
 831   /* Analyze phi functions of the loop header.  */
 832
 833   if (dump_enabled_p ())
 834     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 835   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 836     {
 837       tree evolution_part;
 838
 839       gphi *phi = gsi.phi ();
 840       if (dump_enabled_p ())
 841         {
 842           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 843           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 844         }
 845
 846       /* Skip virtual phi's. The data dependences that are associated with
 847          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 848
 849          Skip reduction phis.  */
 850       if (!iv_phi_p (phi))
 851         {
 852           if (dump_enabled_p ())
 853             dump_printf_loc (MSG_NOTE, vect_location,
 854                              "reduc or virtual phi. skip.\n");
 855           continue;
 856         }
 857
 858       /* Analyze the evolution function.  */
 859
 860       evolution_part
 861         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 862       if (evolution_part == NULL_TREE)
 863         {
 864           if (dump_enabled_p ())
 865             dump_printf (MSG_MISSED_OPTIMIZATION,
 866                          "No access function or evolution.\n");
 867           return false;
 868         }
 869
 870       /* FORNOW: We do not transform initial conditions of IVs
 871          which evolution functions are not invariants in the loop.  */
 872
 873       if (!expr_invariant_in_loop_p (loop, evolution_part))
 874         {
 875           if (dump_enabled_p ())
 876             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 877                              "evolution not invariant in loop.\n");
 878           return false;
 879         }
 880
 881       /* FORNOW: We do not transform initial conditions of IVs
 882          which evolution functions are a polynomial of degree >= 2.  */
 883
 884       if (tree_is_chrec (evolution_part))
 885         {
 886           if (dump_enabled_p ())
 887             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 888                              "evolution is chrec.\n");
 889           return false;
 890         }
 891     }
 892
 893   return true;
 894 }
 895
 896
 897 /*   Function vect_update_ivs_after_vectorizer.
 898
 899      "Advance" the induction variables of LOOP to the value they should take
 900      after the execution of LOOP.  This is currently necessary because the
 901      vectorizer does not handle induction variables that are used after the
 902      loop.  Such a situation occurs when the last iterations of LOOP are
 903      peeled, because:
 904      1. We introduced new uses after LOOP for IVs that were not originally used
 905         after LOOP: the IVs of LOOP are now used by an epilog loop.
 906      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 907         times, whereas the loop IVs should be bumped N times.
 908
 909      Input:
 910      - LOOP - a loop that is going to be vectorized. The last few iterations
 911               of LOOP were peeled.
 912      - NITERS - the number of iterations that LOOP executes (before it is
 913                 vectorized). i.e, the number of times the ivs should be bumped.
 914      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 915                   coming out from LOOP on which there are uses of the LOOP ivs
 916                   (this is the path from LOOP->exit to epilog_loop->preheader).
 917
 918                   The new definitions of the ivs are placed in LOOP->exit.
 919                   The phi args associated with the edge UPDATE_E in the bb
 920                   UPDATE_E->dest are updated accordingly.
 921
 922      Assumption 1: Like the rest of the vectorizer, this function assumes
 923      a single loop exit that has a single predecessor.
 924
 925      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 926      organized in the same order.
 927
 928      Assumption 3: The access function of the ivs is simple enough (see
 929      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 930
 931      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 932      coming out of LOOP on which the ivs of LOOP are used (this is the path
 933      that leads to the epilog loop; other paths skip the epilog loop).  This
 934      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 935      needs to have its phis updated.
 936  */
 937
 938 static void
 939 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 940                                   tree niters, edge update_e)
 941 {
 942   gphi_iterator gsi, gsi1;
 943   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 944   basic_block update_bb = update_e->dest;
 945   basic_block exit_bb = single_exit (loop)->dest;
 946
 947   /* Make sure there exists a single-predecessor exit bb:  */
 948   gcc_assert (single_pred_p (exit_bb));
 949   gcc_assert (single_succ_edge (exit_bb) == update_e);
 950
 951   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 952        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 953        gsi_next (&gsi), gsi_next (&gsi1))
 954     {
 955       tree init_expr;
 956       tree step_expr, off;
 957       tree type;
 958       tree var, ni, ni_name;
 959       gimple_stmt_iterator last_gsi;
 960
 961       gphi *phi = gsi.phi ();
 962       gphi *phi1 = gsi1.phi ();
 963       if (dump_enabled_p ())
 964         {
 965           dump_printf_loc (MSG_NOTE, vect_location,
 966                            "vect_update_ivs_after_vectorizer: phi: ");
 967           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 968         }
 969
 970       /* Skip reduction and virtual phis.  */
 971       if (!iv_phi_p (phi))
 972         {
 973           if (dump_enabled_p ())
 974             dump_printf_loc (MSG_NOTE, vect_location,
 975                              "reduc or virtual phi. skip.\n");
 976           continue;
 977         }
 978
 979       type = TREE_TYPE (gimple_phi_result (phi));
 980       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 981       step_expr = unshare_expr (step_expr);
 982
 983       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 984          of degree >= 2 or exponential.  */
 985       gcc_assert (!tree_is_chrec (step_expr));
 986
 987       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 988
 989       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 990                          fold_convert (TREE_TYPE (step_expr), niters),
 991                          step_expr);
 992       if (POINTER_TYPE_P (type))
 993         ni = fold_build_pointer_plus (init_expr, off);
 994       else
 995         ni = fold_build2 (PLUS_EXPR, type,
 996                           init_expr, fold_convert (type, off));
 997
 998       var = create_tmp_var (type, "tmp");
 999
1000       last_gsi = gsi_last_bb (exit_bb);
1001       gimple_seq new_stmts = NULL;
1002       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1003       /* Exit_bb shouldn't be empty.  */
1004       if (!gsi_end_p (last_gsi))
1005         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1006       else
1007         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
1008
1009       /* Fix phi expressions in the successor bb.  */
1010       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1011     }
1012 }
1013
1014 /* Function vect_gen_prolog_loop_niters
1015
1016    Generate the number of iterations which should be peeled as prolog for the
1017    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
1018    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1019    As a result, after the execution of this loop, the data reference DR will
1020    refer to an aligned location.  The following computation is generated:
1021
1022    If the misalignment of DR is known at compile time:
1023      addr_mis = int mis = DR_MISALIGNMENT (dr);
1024    Else, compute address misalignment in bytes:
1025      addr_mis = addr & (vectype_align - 1)
1026
1027    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
1028
1029    (elem_size = element type size; an element is the scalar element whose type
1030    is the inner type of the vectype)
1031
1032    The computations will be emitted at the end of BB.  We also compute and
1033    store upper bound (included) of the result in BOUND.
1034
1035    When the step of the data-ref in the loop is not 1 (as in interleaved data
1036    and SLP), the number of iterations of the prolog must be divided by the step
1037    (which is equal to the size of interleaved group).
1038
1039    The above formulas assume that VF == number of elements in the vector. This
1040    may not hold when there are multiple-types in the loop.
1041    In this case, for some data-references in the loop the VF does not represent
1042    the number of elements that fit in the vector.  Therefore, instead of VF we
1043    use TYPE_VECTOR_SUBPARTS.  */
1044
1045 static tree
1046 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1047                              basic_block bb, int *bound)
1048 {
1049   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1050   tree var;
1051   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1052   gimple_seq stmts = NULL, new_stmts = NULL;
1053   tree iters, iters_name;
1054   gimple *dr_stmt = DR_STMT (dr);
1055   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1056   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1057   unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1058
1059   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1060     {
1061       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1062
1063       if (dump_enabled_p ())
1064         dump_printf_loc (MSG_NOTE, vect_location,
1065                          "known peeling = %d.\n", npeel);
1066
1067       iters = build_int_cst (niters_type, npeel);
1068       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1069     }
1070   else
1071     {
1072       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1073       tree offset = negative
1074           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1075       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1076                                                               &stmts, offset);
1077       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1078       tree target_align_minus_1 = build_int_cst (type, target_align - 1);
1079       HOST_WIDE_INT elem_size
1080         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1081       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1082       HOST_WIDE_INT align_in_elems = target_align / elem_size;
1083       tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
1084       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
1085       tree misalign_in_bytes;
1086       tree misalign_in_elems;
1087
1088       /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
1089       misalign_in_bytes
1090         = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1091                        target_align_minus_1);
1092
1093       /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
1094       misalign_in_elems
1095         = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
1096
1097       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
1098                                  & (align_in_elems - 1)).  */
1099       if (negative)
1100         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1101                              align_in_elems_tree);
1102       else
1103         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1104                              misalign_in_elems);
1105       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1106       iters = fold_convert (niters_type, iters);
1107       *bound = align_in_elems - 1;
1108     }
1109
1110   if (dump_enabled_p ())
1111     {
1112       dump_printf_loc (MSG_NOTE, vect_location,
1113                        "niters for prolog loop: ");
1114       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1115       dump_printf (MSG_NOTE, "\n");
1116     }
1117
1118   var = create_tmp_var (niters_type, "prolog_loop_niters");
1119   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1120
1121   if (new_stmts)
1122     gimple_seq_add_seq (&stmts, new_stmts);
1123   if (stmts)
1124     {
1125       gcc_assert (single_succ_p (bb));
1126       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1127       if (gsi_end_p (gsi))
1128         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1129       else
1130         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1131     }
1132   return iters_name;
1133 }
1134
1135
1136 /* Function vect_update_init_of_dr
1137
1138    NITERS iterations were peeled from LOOP.  DR represents a data reference
1139    in LOOP.  This function updates the information recorded in DR to
1140    account for the fact that the first NITERS iterations had already been
1141    executed.  Specifically, it updates the OFFSET field of DR.  */
1142
1143 static void
1144 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1145 {
1146   tree offset = DR_OFFSET (dr);
1147
1148   niters = fold_build2 (MULT_EXPR, sizetype,
1149                         fold_convert (sizetype, niters),
1150                         fold_convert (sizetype, DR_STEP (dr)));
1151   offset = fold_build2 (PLUS_EXPR, sizetype,
1152                         fold_convert (sizetype, offset), niters);
1153   DR_OFFSET (dr) = offset;
1154 }
1155
1156
1157 /* Function vect_update_inits_of_drs
1158
1159    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1160    This function updates the information recorded for the data references in
1161    the loop to account for the fact that the first NITERS iterations had
1162    already been executed.  Specifically, it updates the initial_condition of
1163    the access_function of all the data_references in the loop.  */
1164
1165 static void
1166 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1167 {
1168   unsigned int i;
1169   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1170   struct data_reference *dr;
1171
1172   if (dump_enabled_p ())
1173     dump_printf_loc (MSG_NOTE, vect_location,
1174                      "=== vect_update_inits_of_dr ===\n");
1175
1176   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1177   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1178     {
1179       gimple_seq seq;
1180       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1181       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1182
1183       niters = fold_convert (sizetype, niters);
1184       niters = force_gimple_operand (niters, &seq, false, var);
1185       if (seq)
1186         {
1187           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1188           gcc_assert (!new_bb);
1189         }
1190     }
1191
1192   FOR_EACH_VEC_ELT (datarefs, i, dr)
1193     vect_update_init_of_dr (dr, niters);
1194 }
1195
1196
1197 /* This function builds ni_name = number of iterations.  Statements
1198    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
1199    it to TRUE if new ssa_var is generated.  */
1200
1201 tree
1202 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1203 {
1204   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1205   if (TREE_CODE (ni) == INTEGER_CST)
1206     return ni;
1207   else
1208     {
1209       tree ni_name, var;
1210       gimple_seq stmts = NULL;
1211       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1212
1213       var = create_tmp_var (TREE_TYPE (ni), "niters");
1214       ni_name = force_gimple_operand (ni, &stmts, false, var);
1215       if (stmts)
1216         {
1217           gsi_insert_seq_on_edge_immediate (pe, stmts);
1218           if (new_var_p != NULL)
1219             *new_var_p = true;
1220         }
1221
1222       return ni_name;
1223     }
1224 }
1225
1226 /* Calculate the number of iterations above which vectorized loop will be
1227    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1228    of prolog loop.  If it's integer const, the integer number is also passed
1229    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1230    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1231    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1232    (rather than vectorized) loop will be executed.  This function stores
1233    upper bound (included) of the result in BOUND_SCALAR.  */
1234
1235 static tree
1236 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1237                              int bound_prolog, poly_int64 vfm1, int th,
1238                              poly_uint64 *bound_scalar,
1239                              bool check_profitability)
1240 {
1241   tree type = TREE_TYPE (niters_prolog);
1242   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1243                              build_int_cst (type, vfm1));
1244
1245   *bound_scalar = vfm1 + bound_prolog;
1246   if (check_profitability)
1247     {
1248       /* TH indicates the minimum niters of vectorized loop, while we
1249          compute the maximum niters of scalar loop.  */
1250       th--;
1251       /* Peeling for constant times.  */
1252       if (int_niters_prolog >= 0)
1253         {
1254           *bound_scalar = upper_bound (int_niters_prolog + vfm1, th);
1255           return build_int_cst (type, *bound_scalar);
1256         }
1257       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1258          bound (inlcuded) of niters of prolog loop.  */
1259       if (known_ge (th, vfm1 + bound_prolog))
1260         {
1261           *bound_scalar = th;
1262           return build_int_cst (type, th);
1263         }
1264       /* Need to do runtime comparison.  */
1265       else if (maybe_gt (th, vfm1))
1266         {
1267           *bound_scalar = upper_bound (*bound_scalar, th);
1268           return fold_build2 (MAX_EXPR, type,
1269                               build_int_cst (type, th), niters);
1270         }
1271     }
1272   return niters;
1273 }
1274
1275 /* NITERS is the number of times that the original scalar loop executes
1276    after peeling.  Work out the maximum number of iterations N that can
1277    be handled by the vectorized form of the loop and then either:
1278
1279    a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
1280
1281         niters_vector = N
1282
1283    b) set *STEP_VECTOR_PTR to one and generate:
1284
1285         niters_vector = N / vf
1286
1287    In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1288    any new statements on the loop preheader edge.  NITERS_NO_OVERFLOW
1289    is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero).  */
1290
1291 void
1292 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1293                              tree *niters_vector_ptr, tree *step_vector_ptr,
1294                              bool niters_no_overflow)
1295 {
1296   tree ni_minus_gap, var;
1297   tree niters_vector, step_vector, type = TREE_TYPE (niters);
1298   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1299   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1300   tree log_vf = NULL_TREE;
1301
1302   /* If epilogue loop is required because of data accesses with gaps, we
1303      subtract one iteration from the total number of iterations here for
1304      correct calculation of RATIO.  */
1305   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1306     {
1307       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1308                                   build_one_cst (type));
1309       if (!is_gimple_val (ni_minus_gap))
1310         {
1311           var = create_tmp_var (type, "ni_gap");
1312           gimple *stmts = NULL;
1313           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1314                                                true, var);
1315           gsi_insert_seq_on_edge_immediate (pe, stmts);
1316         }
1317     }
1318   else
1319     ni_minus_gap = niters;
1320
1321   unsigned HOST_WIDE_INT const_vf;
1322   if (vf.is_constant (&const_vf))
1323     {
1324       /* Create: niters >> log2(vf) */
1325       /* If it's known that niters == number of latch executions + 1 doesn't
1326          overflow, we can generate niters >> log2(vf); otherwise we generate
1327          (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1328          will be at least one.  */
1329       log_vf = build_int_cst (type, exact_log2 (const_vf));
1330       if (niters_no_overflow)
1331         niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1332       else
1333         niters_vector
1334           = fold_build2 (PLUS_EXPR, type,
1335                          fold_build2 (RSHIFT_EXPR, type,
1336                                       fold_build2 (MINUS_EXPR, type,
1337                                                    ni_minus_gap,
1338                                                    build_int_cst (type, vf)),
1339                                       log_vf),
1340                          build_int_cst (type, 1));
1341       step_vector = build_one_cst (type);
1342     }
1343   else
1344     {
1345       niters_vector = ni_minus_gap;
1346       step_vector = build_int_cst (type, vf);
1347     }
1348
1349   if (!is_gimple_val (niters_vector))
1350     {
1351       var = create_tmp_var (type, "bnd");
1352       gimple_seq stmts = NULL;
1353       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1354       gsi_insert_seq_on_edge_immediate (pe, stmts);
1355       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1356          we set range information to make niters analyzer's life easier.  */
1357       if (stmts != NULL && log_vf)
1358         set_range_info (niters_vector, VR_RANGE,
1359                         wi::to_wide (build_int_cst (type, 1)),
1360                         wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1361                                                   TYPE_MAX_VALUE (type),
1362                                                   log_vf)));
1363     }
1364   *niters_vector_ptr = niters_vector;
1365   *step_vector_ptr = step_vector;
1366
1367   return;
1368 }
1369
1370 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1371    loop specified by LOOP_VINFO after vectorization, compute the number
1372    of iterations before vectorization (niters_vector * vf) and store it
1373    to NITERS_VECTOR_MULT_VF_PTR.  */
1374
1375 static void
1376 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1377                                      tree niters_vector,
1378                                      tree *niters_vector_mult_vf_ptr)
1379 {
1380   /* We should be using a step_vector of VF if VF is variable.  */
1381   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
1382   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1383   tree type = TREE_TYPE (niters_vector);
1384   tree log_vf = build_int_cst (type, exact_log2 (vf));
1385   basic_block exit_bb = single_exit (loop)->dest;
1386
1387   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1388   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1389                                             niters_vector, log_vf);
1390   if (!is_gimple_val (niters_vector_mult_vf))
1391     {
1392       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1393       gimple_seq stmts = NULL;
1394       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1395                                                     &stmts, true, var);
1396       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1397       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1398     }
1399   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1400 }
1401
1402 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1403    from SECOND/FIRST and puts it at the original loop's preheader/exit
1404    edge, the two loops are arranged as below:
1405
1406        preheader_a:
1407      first_loop:
1408        header_a:
1409          i_1 = PHI<i_0, i_2>;
1410          ...
1411          i_2 = i_1 + 1;
1412          if (cond_a)
1413            goto latch_a;
1414          else
1415            goto between_bb;
1416        latch_a:
1417          goto header_a;
1418
1419        between_bb:
1420          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1421
1422      second_loop:
1423        header_b:
1424          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1425                                  or with i_2 if no LCSSA phi is created
1426                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1427          ...
1428          i_4 = i_3 + 1;
1429          if (cond_b)
1430            goto latch_b;
1431          else
1432            goto exit_bb;
1433        latch_b:
1434          goto header_b;
1435
1436        exit_bb:
1437
1438    This function creates loop closed SSA for the first loop; update the
1439    second loop's PHI nodes by replacing argument on incoming edge with the
1440    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1441    is false, Loop closed ssa phis will only be created for non-iv phis for
1442    the first loop.
1443
1444    This function assumes exit bb of the first loop is preheader bb of the
1445    second loop, i.e, between_bb in the example code.  With PHIs updated,
1446    the second loop will execute rest iterations of the first.  */
1447
1448 static void
1449 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1450                                    struct loop *first, struct loop *second,
1451                                    bool create_lcssa_for_iv_phis)
1452 {
1453   gphi_iterator gsi_update, gsi_orig;
1454   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1455
1456   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1457   edge second_preheader_e = loop_preheader_edge (second);
1458   basic_block between_bb = single_exit (first)->dest;
1459
1460   gcc_assert (between_bb == second_preheader_e->src);
1461   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1462   /* Either the first loop or the second is the loop to be vectorized.  */
1463   gcc_assert (loop == first || loop == second);
1464
1465   for (gsi_orig = gsi_start_phis (first->header),
1466        gsi_update = gsi_start_phis (second->header);
1467        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1468        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1469     {
1470       gphi *orig_phi = gsi_orig.phi ();
1471       gphi *update_phi = gsi_update.phi ();
1472
1473       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1474       /* Generate lcssa PHI node for the first loop.  */
1475       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1476       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1477         {
1478           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1479           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1480           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1481           arg = new_res;
1482         }
1483
1484       /* Update PHI node in the second loop by replacing arg on the loop's
1485          incoming edge.  */
1486       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1487     }
1488 }
1489
1490 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1491    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1492    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1493    appear like below:
1494
1495        guard_bb:
1496          if (cond)
1497            goto merge_bb;
1498          else
1499            goto skip_loop;
1500
1501      skip_loop:
1502        header_a:
1503          i_1 = PHI<i_0, i_2>;
1504          ...
1505          i_2 = i_1 + 1;
1506          if (cond_a)
1507            goto latch_a;
1508          else
1509            goto exit_a;
1510        latch_a:
1511          goto header_a;
1512
1513        exit_a:
1514          i_5 = PHI<i_2>;
1515
1516        merge_bb:
1517          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1518
1519      update_loop:
1520        header_b:
1521          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1522          ...
1523          i_4 = i_3 + 1;
1524          if (cond_b)
1525            goto latch_b;
1526          else
1527            goto exit_bb;
1528        latch_b:
1529          goto header_b;
1530
1531        exit_bb:
1532
1533    This function creates PHI nodes at merge_bb and replaces the use of i_5
1534    in the update_loop's PHI node with the result of new PHI result.  */
1535
1536 static void
1537 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1538                                     struct loop *update_loop,
1539                                     edge guard_edge, edge merge_edge)
1540 {
1541   source_location merge_loc, guard_loc;
1542   edge orig_e = loop_preheader_edge (skip_loop);
1543   edge update_e = loop_preheader_edge (update_loop);
1544   gphi_iterator gsi_orig, gsi_update;
1545
1546   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1547         gsi_update = gsi_start_phis (update_loop->header));
1548        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1549        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1550     {
1551       gphi *orig_phi = gsi_orig.phi ();
1552       gphi *update_phi = gsi_update.phi ();
1553
1554       /* Generate new phi node at merge bb of the guard.  */
1555       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1556       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1557
1558       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1559          args in NEW_PHI for these edges.  */
1560       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1561       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1562       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1563       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1564       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1565       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1566
1567       /* Update phi in UPDATE_PHI.  */
1568       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1569     }
1570 }
1571
1572 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1573    this function searches for the corresponding lcssa phi node in exit
1574    bb of LOOP.  If it is found, return the phi result; otherwise return
1575    NULL.  */
1576
1577 static tree
1578 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1579                 gphi *lcssa_phi)
1580 {
1581   gphi_iterator gsi;
1582   edge e = single_exit (loop);
1583
1584   gcc_assert (single_pred_p (e->dest));
1585   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1586     {
1587       gphi *phi = gsi.phi ();
1588       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1589                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1590         return PHI_RESULT (phi);
1591     }
1592   return NULL_TREE;
1593 }
1594
1595 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1596    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1597    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1598    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1599    The CFG looks like:
1600
1601      loop:
1602        header_a:
1603          i_1 = PHI<i_0, i_2>;
1604          ...
1605          i_2 = i_1 + 1;
1606          if (cond_a)
1607            goto latch_a;
1608          else
1609            goto exit_a;
1610        latch_a:
1611          goto header_a;
1612
1613        exit_a:
1614
1615        guard_bb:
1616          if (cond)
1617            goto merge_bb;
1618          else
1619            goto epilog_loop;
1620
1621        ;; fall_through_bb
1622
1623      epilog_loop:
1624        header_b:
1625          i_3 = PHI<i_2, i_4>;
1626          ...
1627          i_4 = i_3 + 1;
1628          if (cond_b)
1629            goto latch_b;
1630          else
1631            goto merge_bb;
1632        latch_b:
1633          goto header_b;
1634
1635        merge_bb:
1636          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1637
1638        exit_bb:
1639          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1640
1641    For each name used out side EPILOG (i.e - for each name that has a lcssa
1642    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1643    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1644    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1645    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1646    in exit_bb will also be updated.  */
1647
1648 static void
1649 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1650                                     edge guard_edge, edge merge_edge)
1651 {
1652   gphi_iterator gsi;
1653   basic_block merge_bb = guard_edge->dest;
1654
1655   gcc_assert (single_succ_p (merge_bb));
1656   edge e = single_succ_edge (merge_bb);
1657   basic_block exit_bb = e->dest;
1658   gcc_assert (single_pred_p (exit_bb));
1659   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1660
1661   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1662     {
1663       gphi *update_phi = gsi.phi ();
1664       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1665       /* This loop-closed-phi actually doesn't represent a use out of the
1666          loop - the phi arg is a constant.  */
1667       if (TREE_CODE (old_arg) != SSA_NAME)
1668         continue;
1669
1670       tree merge_arg = get_current_def (old_arg);
1671       if (!merge_arg)
1672         merge_arg = old_arg;
1673
1674       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1675       /* If the var is live after loop but not a reduction, we simply
1676          use the old arg.  */
1677       if (!guard_arg)
1678         guard_arg = old_arg;
1679
1680       /* Create new phi node in MERGE_BB:  */
1681       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1682       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1683
1684       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1685          the two PHI args in merge_phi for these edges.  */
1686       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1687       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1688
1689       /* Update the original phi in exit_bb.  */
1690       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1691     }
1692 }
1693
1694 /* EPILOG loop is duplicated from the original loop for vectorizing,
1695    the arg of its loop closed ssa PHI needs to be updated.  */
1696
1697 static void
1698 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1699 {
1700   gphi_iterator gsi;
1701   basic_block exit_bb = single_exit (epilog)->dest;
1702
1703   gcc_assert (single_pred_p (exit_bb));
1704   edge e = EDGE_PRED (exit_bb, 0);
1705   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1706     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1707 }
1708
1709 /* Function vect_do_peeling.
1710
1711    Input:
1712    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1713
1714        preheader:
1715      LOOP:
1716        header_bb:
1717          loop_body
1718          if (exit_loop_cond) goto exit_bb
1719          else                goto header_bb
1720        exit_bb:
1721
1722    - NITERS: The number of iterations of the loop.
1723    - NITERSM1: The number of iterations of the loop's latch.
1724    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1725    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1726                               CHECK_PROFITABILITY is true.
1727    Output:
1728    - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
1729      iterate after vectorization; see slpeel_make_loop_iterate_ntimes
1730      for details.
1731    - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
1732      should be set to the number of scalar iterations handled by the
1733      vector loop.  The SSA name is only used on exit from the loop.
1734
1735    This function peels prolog and epilog from the loop, adds guards skipping
1736    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1737    would look like:
1738
1739        guard_bb_1:
1740          if (prefer_scalar_loop) goto merge_bb_1
1741          else                    goto guard_bb_2
1742
1743        guard_bb_2:
1744          if (skip_prolog) goto merge_bb_2
1745          else             goto prolog_preheader
1746
1747        prolog_preheader:
1748      PROLOG:
1749        prolog_header_bb:
1750          prolog_body
1751          if (exit_prolog_cond) goto prolog_exit_bb
1752          else                  goto prolog_header_bb
1753        prolog_exit_bb:
1754
1755        merge_bb_2:
1756
1757        vector_preheader:
1758      VECTOR LOOP:
1759        vector_header_bb:
1760          vector_body
1761          if (exit_vector_cond) goto vector_exit_bb
1762          else                  goto vector_header_bb
1763        vector_exit_bb:
1764
1765        guard_bb_3:
1766          if (skip_epilog) goto merge_bb_3
1767          else             goto epilog_preheader
1768
1769        merge_bb_1:
1770
1771        epilog_preheader:
1772      EPILOG:
1773        epilog_header_bb:
1774          epilog_body
1775          if (exit_epilog_cond) goto merge_bb_3
1776          else                  goto epilog_header_bb
1777
1778        merge_bb_3:
1779
1780    Note this function peels prolog and epilog only if it's necessary,
1781    as well as guards.
1782    Returns created epilogue or NULL.
1783
1784    TODO: Guard for prefer_scalar_loop should be emitted along with
1785    versioning conditions if loop versioning is needed.  */
1786
1787
1788 struct loop *
1789 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1790                  tree *niters_vector, tree *step_vector,
1791                  tree *niters_vector_mult_vf_var, int th,
1792                  bool check_profitability, bool niters_no_overflow)
1793 {
1794   edge e, guard_e;
1795   tree type = TREE_TYPE (niters), guard_cond;
1796   basic_block guard_bb, guard_to;
1797   profile_probability prob_prolog, prob_vector, prob_epilog;
1798   int bound_prolog = 0;
1799   poly_uint64 bound_scalar = 0;
1800   int estimated_vf;
1801   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1802   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1803                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1804
1805   if (!prolog_peeling && !epilog_peeling)
1806     return NULL;
1807
1808   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1809   estimated_vf = vect_vf_for_cost (loop_vinfo);
1810   if (estimated_vf == 2)
1811     estimated_vf = 3;
1812   prob_prolog = prob_epilog = profile_probability::guessed_always ()
1813                         .apply_scale (estimated_vf - 1, estimated_vf);
1814   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1815
1816   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1817   struct loop *first_loop = loop;
1818   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1819   create_lcssa_for_virtual_phi (loop);
1820   update_ssa (TODO_update_ssa_only_virtuals);
1821
1822   if (MAY_HAVE_DEBUG_BIND_STMTS)
1823     {
1824       gcc_assert (!adjust_vec.exists ());
1825       adjust_vec.create (32);
1826     }
1827   initialize_original_copy_tables ();
1828
1829   /* Prolog loop may be skipped.  */
1830   bool skip_prolog = (prolog_peeling != 0);
1831   /* Skip to epilog if scalar loop may be preferred.  It's only needed
1832      when we peel for epilog loop and when it hasn't been checked with
1833      loop versioning.  */
1834   bool skip_vector = ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1835                        && !LOOP_REQUIRES_VERSIONING (loop_vinfo))
1836                       || !vf.is_constant ());
1837   /* Epilog loop must be executed if the number of iterations for epilog
1838      loop is known at compile time, otherwise we need to add a check at
1839      the end of vector loop and skip to the end of epilog loop.  */
1840   bool skip_epilog = (prolog_peeling < 0
1841                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1842                       || !vf.is_constant ());
1843   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1844   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1845     skip_epilog = false;
1846
1847   /* Record the anchor bb at which guard should be placed if scalar loop
1848      may be preferred.  */
1849   basic_block anchor = loop_preheader_edge (loop)->src;
1850   if (skip_vector)
1851     {
1852       split_edge (loop_preheader_edge (loop));
1853
1854       /* Due to the order in which we peel prolog and epilog, we first
1855          propagate probability to the whole loop.  The purpose is to
1856          avoid adjusting probabilities of both prolog and vector loops
1857          separately.  Note in this case, the probability of epilog loop
1858          needs to be scaled back later.  */
1859       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1860       if (prob_vector.initialized_p ())
1861         {
1862           scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1863           scale_loop_profile (loop, prob_vector, 0);
1864         }
1865     }
1866
1867   tree niters_prolog = build_int_cst (type, 0);
1868   source_location loop_loc = find_loop_location (loop);
1869   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1870   if (prolog_peeling)
1871     {
1872       e = loop_preheader_edge (loop);
1873       if (!slpeel_can_duplicate_loop_p (loop, e))
1874         {
1875           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1876                            "loop can't be duplicated to preheader edge.\n");
1877           gcc_unreachable ();
1878         }
1879       /* Peel prolog and put it on preheader edge of loop.  */
1880       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1881       if (!prolog)
1882         {
1883           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1884                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1885           gcc_unreachable ();
1886         }
1887       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1888       first_loop = prolog;
1889       reset_original_copy_tables ();
1890
1891       /* Generate and update the number of iterations for prolog loop.  */
1892       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1893                                                    &bound_prolog);
1894       tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
1895       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog, step_prolog,
1896                                        NULL_TREE, false);
1897
1898       /* Skip the prolog loop.  */
1899       if (skip_prolog)
1900         {
1901           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1902                                     niters_prolog, build_int_cst (type, 0));
1903           guard_bb = loop_preheader_edge (prolog)->src;
1904           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1905           guard_to = split_edge (loop_preheader_edge (loop));
1906           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1907                                            guard_to, guard_bb,
1908                                            prob_prolog.invert (),
1909                                            irred_flag);
1910           e = EDGE_PRED (guard_to, 0);
1911           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1912           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1913
1914           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1915           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1916         }
1917       /* Update init address of DRs.  */
1918       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1919       /* Update niters for vector loop.  */
1920       LOOP_VINFO_NITERS (loop_vinfo)
1921         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1922       LOOP_VINFO_NITERSM1 (loop_vinfo)
1923         = fold_build2 (MINUS_EXPR, type,
1924                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1925       bool new_var_p = false;
1926       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1927       /* It's guaranteed that vector loop bound before vectorization is at
1928          least VF, so set range information for newly generated var.  */
1929       if (new_var_p)
1930         set_range_info (niters, VR_RANGE,
1931                         wi::to_wide (build_int_cst (type, vf)),
1932                         wi::to_wide (TYPE_MAX_VALUE (type)));
1933
1934       /* Prolog iterates at most bound_prolog times, latch iterates at
1935          most bound_prolog - 1 times.  */
1936       record_niter_bound (prolog, bound_prolog - 1, false, true);
1937       delete_update_ssa ();
1938       adjust_vec_debug_stmts ();
1939       scev_reset ();
1940     }
1941
1942   if (epilog_peeling)
1943     {
1944       e = single_exit (loop);
1945       if (!slpeel_can_duplicate_loop_p (loop, e))
1946         {
1947           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1948                            "loop can't be duplicated to exit edge.\n");
1949           gcc_unreachable ();
1950         }
1951       /* Peel epilog and put it on exit edge of loop.  */
1952       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1953       if (!epilog)
1954         {
1955           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1956                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1957           gcc_unreachable ();
1958         }
1959       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1960
1961       /* Scalar version loop may be preferred.  In this case, add guard
1962          and skip to epilog.  Note this only happens when the number of
1963          iterations of loop is unknown at compile time, otherwise this
1964          won't be vectorized.  */
1965       if (skip_vector)
1966         {
1967           /* Additional epilogue iteration is peeled if gap exists.  */
1968           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1969           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1970                                                 bound_prolog,
1971                                                 peel_for_gaps ? vf : vf - 1,
1972                                                 th, &bound_scalar,
1973                                                 check_profitability);
1974           /* Build guard against NITERSM1 since NITERS may overflow.  */
1975           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1976           guard_bb = anchor;
1977           guard_to = split_edge (loop_preheader_edge (epilog));
1978           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1979                                            guard_to, guard_bb,
1980                                            prob_vector.invert (),
1981                                            irred_flag);
1982           e = EDGE_PRED (guard_to, 0);
1983           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1984           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1985
1986           /* Simply propagate profile info from guard_bb to guard_to which is
1987              a merge point of control flow.  */
1988           guard_to->count = guard_bb->count;
1989
1990           /* Scale probability of epilog loop back.
1991              FIXME: We should avoid scaling down and back up.  Profile may
1992              get lost if we scale down to 0.  */
1993           basic_block *bbs = get_loop_body (epilog);
1994           for (unsigned int i = 0; i < epilog->num_nodes; i++)
1995             bbs[i]->count = bbs[i]->count.apply_scale
1996                                  (bbs[i]->count,
1997                                   bbs[i]->count.apply_probability
1998                                     (prob_vector));
1999           free (bbs);
2000         }
2001
2002       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
2003       tree niters_vector_mult_vf;
2004       /* If loop is peeled for non-zero constant times, now niters refers to
2005          orig_niters - prolog_peeling, it won't overflow even the orig_niters
2006          overflows.  */
2007       niters_no_overflow |= (prolog_peeling > 0);
2008       vect_gen_vector_loop_niters (loop_vinfo, niters,
2009                                    niters_vector, step_vector,
2010                                    niters_no_overflow);
2011       if (!integer_onep (*step_vector))
2012         {
2013           /* On exit from the loop we will have an easy way of calcalating
2014              NITERS_VECTOR / STEP * STEP.  Install a dummy definition
2015              until then.  */
2016           niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2017           SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2018           *niters_vector_mult_vf_var = niters_vector_mult_vf;
2019         }
2020       else
2021         vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2022                                              &niters_vector_mult_vf);
2023       /* Update IVs of original loop as if they were advanced by
2024          niters_vector_mult_vf steps.  */
2025       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2026       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2027       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2028                                         update_e);
2029
2030       if (skip_epilog)
2031         {
2032           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2033                                     niters, niters_vector_mult_vf);
2034           guard_bb = single_exit (loop)->dest;
2035           guard_to = split_edge (single_exit (epilog));
2036           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2037                                            skip_vector ? anchor : guard_bb,
2038                                            prob_epilog.invert (),
2039                                            irred_flag);
2040           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2041                                               single_exit (epilog));
2042           /* Only need to handle basic block before epilog loop if it's not
2043              the guard_bb, which is the case when skip_vector is true.  */
2044           if (guard_bb != bb_before_epilog)
2045             {
2046               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
2047
2048               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
2049             }
2050           scale_loop_profile (epilog, prob_epilog, 0);
2051         }
2052       else
2053         slpeel_update_phi_nodes_for_lcssa (epilog);
2054
2055       unsigned HOST_WIDE_INT bound1, bound2;
2056       if (vf.is_constant (&bound1) && bound_scalar.is_constant (&bound2))
2057         {
2058           bound1 -= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 2;
2059           if (bound2)
2060             /* We share epilog loop with scalar version loop.  */
2061             bound1 = MAX (bound1, bound2 - 1);
2062           record_niter_bound (epilog, bound1, false, true);
2063         }
2064
2065       delete_update_ssa ();
2066       adjust_vec_debug_stmts ();
2067       scev_reset ();
2068     }
2069   adjust_vec.release ();
2070   free_original_copy_tables ();
2071
2072   return epilog;
2073 }
2074
2075 /* Function vect_create_cond_for_niters_checks.
2076
2077    Create a conditional expression that represents the run-time checks for
2078    loop's niter.  The loop is guaranteed to to terminate if the run-time
2079    checks hold.
2080
2081    Input:
2082    COND_EXPR  - input conditional expression.  New conditions will be chained
2083                 with logical AND operation.  If it is NULL, then the function
2084                 is used to return the number of alias checks.
2085    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2086                 to be checked.
2087
2088    Output:
2089    COND_EXPR - conditional expression.
2090
2091    The returned COND_EXPR is the conditional expression to be used in the
2092    if statement that controls which version of the loop gets executed at
2093    runtime.  */
2094
2095 static void
2096 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2097 {
2098   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2099
2100   if (*cond_expr)
2101     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2102                               *cond_expr, part_cond_expr);
2103   else
2104     *cond_expr = part_cond_expr;
2105 }
2106
2107 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2108    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
2109
2110 static void
2111 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2112 {
2113   if (*cond_expr)
2114     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2115                               *cond_expr, part_cond_expr);
2116   else
2117     *cond_expr = part_cond_expr;
2118 }
2119
2120 /* Function vect_create_cond_for_align_checks.
2121
2122    Create a conditional expression that represents the alignment checks for
2123    all of data references (array element references) whose alignment must be
2124    checked at runtime.
2125
2126    Input:
2127    COND_EXPR  - input conditional expression.  New conditions will be chained
2128                 with logical AND operation.
2129    LOOP_VINFO - two fields of the loop information are used.
2130                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2131                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2132
2133    Output:
2134    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2135                          expression.
2136    The returned value is the conditional expression to be used in the if
2137    statement that controls which version of the loop gets executed at runtime.
2138
2139    The algorithm makes two assumptions:
2140      1) The number of bytes "n" in a vector is a power of 2.
2141      2) An address "a" is aligned if a%n is zero and that this
2142         test can be done as a&(n-1) == 0.  For example, for 16
2143         byte vectors the test is a&0xf == 0.  */
2144
2145 static void
2146 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2147                                    tree *cond_expr,
2148                                    gimple_seq *cond_expr_stmt_list)
2149 {
2150   vec<gimple *> may_misalign_stmts
2151     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2152   gimple *ref_stmt;
2153   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2154   tree mask_cst;
2155   unsigned int i;
2156   tree int_ptrsize_type;
2157   char tmp_name[20];
2158   tree or_tmp_name = NULL_TREE;
2159   tree and_tmp_name;
2160   gimple *and_stmt;
2161   tree ptrsize_zero;
2162   tree part_cond_expr;
2163
2164   /* Check that mask is one less than a power of 2, i.e., mask is
2165      all zeros followed by all ones.  */
2166   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2167
2168   int_ptrsize_type = signed_type_for (ptr_type_node);
2169
2170   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2171      of the first vector of the i'th data reference. */
2172
2173   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2174     {
2175       gimple_seq new_stmt_list = NULL;
2176       tree addr_base;
2177       tree addr_tmp_name;
2178       tree new_or_tmp_name;
2179       gimple *addr_stmt, *or_stmt;
2180       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2181       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2182       bool negative = tree_int_cst_compare
2183         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2184       tree offset = negative
2185         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2186
2187       /* create: addr_tmp = (int)(address_of_first_vector) */
2188       addr_base =
2189         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2190                                               offset);
2191       if (new_stmt_list != NULL)
2192         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2193
2194       sprintf (tmp_name, "addr2int%d", i);
2195       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2196       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2197       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2198
2199       /* The addresses are OR together.  */
2200
2201       if (or_tmp_name != NULL_TREE)
2202         {
2203           /* create: or_tmp = or_tmp | addr_tmp */
2204           sprintf (tmp_name, "orptrs%d", i);
2205           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2206           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2207                                          or_tmp_name, addr_tmp_name);
2208           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2209           or_tmp_name = new_or_tmp_name;
2210         }
2211       else
2212         or_tmp_name = addr_tmp_name;
2213
2214     } /* end for i */
2215
2216   mask_cst = build_int_cst (int_ptrsize_type, mask);
2217
2218   /* create: and_tmp = or_tmp & mask  */
2219   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2220
2221   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2222                                   or_tmp_name, mask_cst);
2223   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2224
2225   /* Make and_tmp the left operand of the conditional test against zero.
2226      if and_tmp has a nonzero bit then some address is unaligned.  */
2227   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2228   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2229                                 and_tmp_name, ptrsize_zero);
2230   chain_cond_expr (cond_expr, part_cond_expr);
2231 }
2232
2233 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2234    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2235    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2236    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
2237
2238 static void
2239 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2240 {
2241   vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2242   unsigned int i;
2243   vec_object_pair *pair;
2244   FOR_EACH_VEC_ELT (pairs, i, pair)
2245     {
2246       tree addr1 = build_fold_addr_expr (pair->first);
2247       tree addr2 = build_fold_addr_expr (pair->second);
2248       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2249                                          addr1, addr2);
2250       chain_cond_expr (cond_expr, part_cond_expr);
2251     }
2252 }
2253
2254 /* Function vect_create_cond_for_alias_checks.
2255
2256    Create a conditional expression that represents the run-time checks for
2257    overlapping of address ranges represented by a list of data references
2258    relations passed as input.
2259
2260    Input:
2261    COND_EXPR  - input conditional expression.  New conditions will be chained
2262                 with logical AND operation.  If it is NULL, then the function
2263                 is used to return the number of alias checks.
2264    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2265                 to be checked.
2266
2267    Output:
2268    COND_EXPR - conditional expression.
2269
2270    The returned COND_EXPR is the conditional expression to be used in the if
2271    statement that controls which version of the loop gets executed at runtime.
2272 */
2273
2274 void
2275 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2276 {
2277   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2278     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2279
2280   if (comp_alias_ddrs.is_empty ())
2281     return;
2282
2283   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2284                                &comp_alias_ddrs, cond_expr);
2285   if (dump_enabled_p ())
2286     dump_printf_loc (MSG_NOTE, vect_location,
2287                      "created %u versioning for alias checks.\n",
2288                      comp_alias_ddrs.length ());
2289 }
2290
2291
2292 /* Function vect_loop_versioning.
2293
2294    If the loop has data references that may or may not be aligned or/and
2295    has data reference relations whose independence was not proven then
2296    two versions of the loop need to be generated, one which is vectorized
2297    and one which isn't.  A test is then generated to control which of the
2298    loops is executed.  The test checks for the alignment of all of the
2299    data references that may or may not be aligned.  An additional
2300    sequence of runtime tests is generated for each pairs of DDRs whose
2301    independence was not proven.  The vectorized version of loop is
2302    executed only if both alias and alignment tests are passed.
2303
2304    The test generated to check which version of loop is executed
2305    is modified to also check for profitability as indicated by the
2306    cost model threshold TH.
2307
2308    The versioning precondition(s) are placed in *COND_EXPR and
2309    *COND_EXPR_STMT_LIST.  */
2310
2311 void
2312 vect_loop_versioning (loop_vec_info loop_vinfo,
2313                       unsigned int th, bool check_profitability,
2314                       poly_uint64 versioning_threshold)
2315 {
2316   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2317   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2318   basic_block condition_bb;
2319   gphi_iterator gsi;
2320   gimple_stmt_iterator cond_exp_gsi;
2321   basic_block merge_bb;
2322   basic_block new_exit_bb;
2323   edge new_exit_e, e;
2324   gphi *orig_phi, *new_phi;
2325   tree cond_expr = NULL_TREE;
2326   gimple_seq cond_expr_stmt_list = NULL;
2327   tree arg;
2328   profile_probability prob = profile_probability::likely ();
2329   gimple_seq gimplify_stmt_list = NULL;
2330   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2331   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2332   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2333   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2334
2335   if (check_profitability)
2336     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2337                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2338                                             th - 1));
2339   if (maybe_ne (versioning_threshold, 0U))
2340     {
2341       tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2342                                build_int_cst (TREE_TYPE (scalar_loop_iters),
2343                                               versioning_threshold - 1));
2344       if (cond_expr)
2345         cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
2346                                  expr, cond_expr);
2347       else
2348         cond_expr = expr;
2349     }
2350
2351   if (version_niter)
2352     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2353
2354   if (cond_expr)
2355     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2356                                         is_gimple_condexpr, NULL_TREE);
2357
2358   if (version_align)
2359     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2360                                        &cond_expr_stmt_list);
2361
2362   if (version_alias)
2363     {
2364       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2365       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2366     }
2367
2368   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2369                                       is_gimple_condexpr, NULL_TREE);
2370   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2371
2372   initialize_original_copy_tables ();
2373   if (scalar_loop)
2374     {
2375       edge scalar_e;
2376       basic_block preheader, scalar_preheader;
2377
2378       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2379          scale LOOP's frequencies instead.  */
2380       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2381                             prob, prob.invert (), prob, prob.invert (), true);
2382       scale_loop_frequencies (loop, prob);
2383       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2384          while we need to move it above LOOP's preheader.  */
2385       e = loop_preheader_edge (loop);
2386       scalar_e = loop_preheader_edge (scalar_loop);
2387       gcc_assert (empty_block_p (e->src)
2388                   && single_pred_p (e->src));
2389       gcc_assert (empty_block_p (scalar_e->src)
2390                   && single_pred_p (scalar_e->src));
2391       gcc_assert (single_pred_p (condition_bb));
2392       preheader = e->src;
2393       scalar_preheader = scalar_e->src;
2394       scalar_e = find_edge (condition_bb, scalar_preheader);
2395       e = single_pred_edge (preheader);
2396       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2397                                       scalar_preheader);
2398       redirect_edge_and_branch_force (scalar_e, preheader);
2399       redirect_edge_and_branch_force (e, condition_bb);
2400       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2401                                single_pred (condition_bb));
2402       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2403                                single_pred (scalar_preheader));
2404       set_immediate_dominator (CDI_DOMINATORS, preheader,
2405                                condition_bb);
2406     }
2407   else
2408     nloop = loop_version (loop, cond_expr, &condition_bb,
2409                           prob, prob.invert (), prob, prob.invert (), true);
2410
2411   if (version_niter)
2412     {
2413       /* The versioned loop could be infinite, we need to clear existing
2414          niter information which is copied from the original loop.  */
2415       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2416       vect_free_loop_info_assumptions (nloop);
2417       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2418       loop_constraint_set (loop, LOOP_C_INFINITE);
2419     }
2420
2421   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2422       && dump_enabled_p ())
2423     {
2424       if (version_alias)
2425         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2426                          "loop versioned for vectorization because of "
2427                          "possible aliasing\n");
2428       if (version_align)
2429         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2430                          "loop versioned for vectorization to enhance "
2431                          "alignment\n");
2432
2433     }
2434   free_original_copy_tables ();
2435
2436   /* Loop versioning violates an assumption we try to maintain during
2437      vectorization - that the loop exit block has a single predecessor.
2438      After versioning, the exit block of both loop versions is the same
2439      basic block (i.e. it has two predecessors). Just in order to simplify
2440      following transformations in the vectorizer, we fix this situation
2441      here by adding a new (empty) block on the exit-edge of the loop,
2442      with the proper loop-exit phis to maintain loop-closed-form.
2443      If loop versioning wasn't done from loop, but scalar_loop instead,
2444      merge_bb will have already just a single successor.  */
2445
2446   merge_bb = single_exit (loop)->dest;
2447   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2448     {
2449       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2450       new_exit_bb = split_edge (single_exit (loop));
2451       new_exit_e = single_exit (loop);
2452       e = EDGE_SUCC (new_exit_bb, 0);
2453
2454       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2455         {
2456           tree new_res;
2457           orig_phi = gsi.phi ();
2458           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2459           new_phi = create_phi_node (new_res, new_exit_bb);
2460           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2461           add_phi_arg (new_phi, arg, new_exit_e,
2462                        gimple_phi_arg_location_from_edge (orig_phi, e));
2463           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2464         }
2465     }
2466
2467   /* End loop-exit-fixes after versioning.  */
2468
2469   if (cond_expr_stmt_list)
2470     {
2471       cond_exp_gsi = gsi_last_bb (condition_bb);
2472       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2473                              GSI_SAME_STMT);
2474     }
2475   update_ssa (TODO_update_ssa);
2476 }