gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120             }
 121         }
 122       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 123            gsi_next (&gsi))
 124         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 125     }
 126 }
 127
 128
 129 struct adjust_info
 130 {
 131   tree from, to;
 132   basic_block bb;
 133 };
 134
 135 /* A stack of values to be adjusted in debug stmts.  We have to
 136    process them LIFO, so that the closest substitution applies.  If we
 137    processed them FIFO, without the stack, we might substitute uses
 138    with a PHI DEF that would soon become non-dominant, and when we got
 139    to the suitable one, it wouldn't have anything to substitute any
 140    more.  */
 141 static vec<adjust_info, va_heap> adjust_vec;
 142
 143 /* Adjust any debug stmts that referenced AI->from values to use the
 144    loop-closed AI->to, if the references are dominated by AI->bb and
 145    not by the definition of AI->from.  */
 146
 147 static void
 148 adjust_debug_stmts_now (adjust_info *ai)
 149 {
 150   basic_block bbphi = ai->bb;
 151   tree orig_def = ai->from;
 152   tree new_def = ai->to;
 153   imm_use_iterator imm_iter;
 154   gimple *stmt;
 155   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 156
 157   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 158
 159   /* Adjust any debug stmts that held onto non-loop-closed
 160      references.  */
 161   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 162     {
 163       use_operand_p use_p;
 164       basic_block bbuse;
 165
 166       if (!is_gimple_debug (stmt))
 167         continue;
 168
 169       gcc_assert (gimple_debug_bind_p (stmt));
 170
 171       bbuse = gimple_bb (stmt);
 172
 173       if ((bbuse == bbphi
 174            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 175           && !(bbuse == bbdef
 176                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 177         {
 178           if (new_def)
 179             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 180               SET_USE (use_p, new_def);
 181           else
 182             {
 183               gimple_debug_bind_reset_value (stmt);
 184               update_stmt (stmt);
 185             }
 186         }
 187     }
 188 }
 189
 190 /* Adjust debug stmts as scheduled before.  */
 191
 192 static void
 193 adjust_vec_debug_stmts (void)
 194 {
 195   if (!MAY_HAVE_DEBUG_STMTS)
 196     return;
 197
 198   gcc_assert (adjust_vec.exists ());
 199
 200   while (!adjust_vec.is_empty ())
 201     {
 202       adjust_debug_stmts_now (&adjust_vec.last ());
 203       adjust_vec.pop ();
 204     }
 205 }
 206
 207 /* Adjust any debug stmts that referenced FROM values to use the
 208    loop-closed TO, if the references are dominated by BB and not by
 209    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 210    will be postponed until adjust_vec_debug_stmts is called.  */
 211
 212 static void
 213 adjust_debug_stmts (tree from, tree to, basic_block bb)
 214 {
 215   adjust_info ai;
 216
 217   if (MAY_HAVE_DEBUG_STMTS
 218       && TREE_CODE (from) == SSA_NAME
 219       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 220       && ! virtual_operand_p (from))
 221     {
 222       ai.from = from;
 223       ai.to = to;
 224       ai.bb = bb;
 225
 226       if (adjust_vec.exists ())
 227         adjust_vec.safe_push (ai);
 228       else
 229         adjust_debug_stmts_now (&ai);
 230     }
 231 }
 232
 233 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 234    to adjust any debug stmts that referenced the old phi arg,
 235    presumably non-loop-closed references left over from other
 236    transformations.  */
 237
 238 static void
 239 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 240 {
 241   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 242
 243   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 244
 245   if (MAY_HAVE_DEBUG_STMTS)
 246     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 247                         gimple_bb (update_phi));
 248 }
 249
 250 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 251    that starts at zero, increases by one and its limit is NITERS.
 252
 253    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 254
 255 void
 256 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 257 {
 258   tree indx_before_incr, indx_after_incr;
 259   gcond *cond_stmt;
 260   gcond *orig_cond;
 261   edge exit_edge = single_exit (loop);
 262   gimple_stmt_iterator loop_cond_gsi;
 263   gimple_stmt_iterator incr_gsi;
 264   bool insert_after;
 265   tree init = build_int_cst (TREE_TYPE (niters), 0);
 266   tree step = build_int_cst (TREE_TYPE (niters), 1);
 267   source_location loop_loc;
 268   enum tree_code code;
 269
 270   orig_cond = get_loop_exit_condition (loop);
 271   gcc_assert (orig_cond);
 272   loop_cond_gsi = gsi_for_stmt (orig_cond);
 273
 274   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 275   create_iv (init, step, NULL_TREE, loop,
 276              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 277
 278   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 279                                               true, NULL_TREE, true,
 280                                               GSI_SAME_STMT);
 281   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 282                                      true, GSI_SAME_STMT);
 283
 284   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 285   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 286                                  NULL_TREE);
 287
 288   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 289
 290   /* Remove old loop exit test:  */
 291   gsi_remove (&loop_cond_gsi, true);
 292   free_stmt_vec_info (orig_cond);
 293
 294   loop_loc = find_loop_location (loop);
 295   if (dump_enabled_p ())
 296     {
 297       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 298         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 299                      LOCATION_LINE (loop_loc));
 300       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 301     }
 302
 303   /* Record the number of latch iterations.  */
 304   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 305                                      build_int_cst (TREE_TYPE (niters), 1));
 306 }
 307
 308 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 309    For all PHI arguments in FROM->dest and TO->dest from those
 310    edges ensure that TO->dest PHI arguments have current_def
 311    to that in from.  */
 312
 313 static void
 314 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 315 {
 316   gimple_stmt_iterator gsi_from, gsi_to;
 317
 318   for (gsi_from = gsi_start_phis (from->dest),
 319        gsi_to = gsi_start_phis (to->dest);
 320        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 321     {
 322       gimple *from_phi = gsi_stmt (gsi_from);
 323       gimple *to_phi = gsi_stmt (gsi_to);
 324       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 325       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 326       if (virtual_operand_p (from_arg))
 327         {
 328           gsi_next (&gsi_from);
 329           continue;
 330         }
 331       if (virtual_operand_p (to_arg))
 332         {
 333           gsi_next (&gsi_to);
 334           continue;
 335         }
 336       if (TREE_CODE (from_arg) != SSA_NAME)
 337         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 338       else
 339         {
 340           if (get_current_def (to_arg) == NULL_TREE)
 341             set_current_def (to_arg, get_current_def (from_arg));
 342         }
 343       gsi_next (&gsi_from);
 344       gsi_next (&gsi_to);
 345     }
 346
 347   gphi *from_phi = get_virtual_phi (from->dest);
 348   gphi *to_phi = get_virtual_phi (to->dest);
 349   if (from_phi)
 350     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 351                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 352 }
 353
 354
 355 /* Given LOOP this function generates a new copy of it and puts it
 356    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 357    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 358    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 359    entry or exit of LOOP.  */
 360
 361 struct loop *
 362 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 363                                         struct loop *scalar_loop, edge e)
 364 {
 365   struct loop *new_loop;
 366   basic_block *new_bbs, *bbs, *pbbs;
 367   bool at_exit;
 368   bool was_imm_dom;
 369   basic_block exit_dest;
 370   edge exit, new_exit;
 371   bool duplicate_outer_loop = false;
 372
 373   exit = single_exit (loop);
 374   at_exit = (e == exit);
 375   if (!at_exit && e != loop_preheader_edge (loop))
 376     return NULL;
 377
 378   if (scalar_loop == NULL)
 379     scalar_loop = loop;
 380
 381   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 382   pbbs = bbs + 1;
 383   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 384   /* Allow duplication of outer loops.  */
 385   if (scalar_loop->inner)
 386     duplicate_outer_loop = true;
 387   /* Check whether duplication is possible.  */
 388   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 389     {
 390       free (bbs);
 391       return NULL;
 392     }
 393
 394   /* Generate new loop structure.  */
 395   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 396   duplicate_subloops (scalar_loop, new_loop);
 397
 398   exit_dest = exit->dest;
 399   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 400                                           exit_dest) == loop->header ?
 401                  true : false);
 402
 403   /* Also copy the pre-header, this avoids jumping through hoops to
 404      duplicate the loop entry PHI arguments.  Create an empty
 405      pre-header unconditionally for this.  */
 406   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 407   edge entry_e = single_pred_edge (preheader);
 408   bbs[0] = preheader;
 409   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 410
 411   exit = single_exit (scalar_loop);
 412   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 413             &exit, 1, &new_exit, NULL,
 414             at_exit ? loop->latch : e->src, true);
 415   exit = single_exit (loop);
 416   basic_block new_preheader = new_bbs[0];
 417
 418   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 419
 420   if (scalar_loop != loop)
 421     {
 422       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 423          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 424          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 425          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 426          header) to have current_def set, so copy them over.  */
 427       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 428                                                 exit);
 429       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 430                                                            0),
 431                                                 EDGE_SUCC (loop->latch, 0));
 432     }
 433
 434   if (at_exit) /* Add the loop copy at exit.  */
 435     {
 436       if (scalar_loop != loop)
 437         {
 438           gphi_iterator gsi;
 439           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 440
 441           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 442                gsi_next (&gsi))
 443             {
 444               gphi *phi = gsi.phi ();
 445               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 446               location_t orig_locus
 447                 = gimple_phi_arg_location_from_edge (phi, e);
 448
 449               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 450             }
 451         }
 452       redirect_edge_and_branch_force (e, new_preheader);
 453       flush_pending_stmts (e);
 454       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 455       if (was_imm_dom || duplicate_outer_loop)
 456         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 457
 458       /* And remove the non-necessary forwarder again.  Keep the other
 459          one so we have a proper pre-header for the loop at the exit edge.  */
 460       redirect_edge_pred (single_succ_edge (preheader),
 461                           single_pred (preheader));
 462       delete_basic_block (preheader);
 463       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 464                                loop_preheader_edge (scalar_loop)->src);
 465     }
 466   else /* Add the copy at entry.  */
 467     {
 468       if (scalar_loop != loop)
 469         {
 470           /* Remove the non-necessary forwarder of scalar_loop again.  */
 471           redirect_edge_pred (single_succ_edge (preheader),
 472                               single_pred (preheader));
 473           delete_basic_block (preheader);
 474           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 475                                    loop_preheader_edge (scalar_loop)->src);
 476           preheader = split_edge (loop_preheader_edge (loop));
 477           entry_e = single_pred_edge (preheader);
 478         }
 479
 480       redirect_edge_and_branch_force (entry_e, new_preheader);
 481       flush_pending_stmts (entry_e);
 482       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 483
 484       redirect_edge_and_branch_force (new_exit, preheader);
 485       flush_pending_stmts (new_exit);
 486       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 487
 488       /* And remove the non-necessary forwarder again.  Keep the other
 489          one so we have a proper pre-header for the loop at the exit edge.  */
 490       redirect_edge_pred (single_succ_edge (new_preheader),
 491                           single_pred (new_preheader));
 492       delete_basic_block (new_preheader);
 493       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 494                                loop_preheader_edge (new_loop)->src);
 495     }
 496
 497   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
 498   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
 499     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 500
 501   if (scalar_loop != loop)
 502     {
 503       /* Update new_loop->header PHIs, so that on the preheader
 504          edge they are the ones from loop rather than scalar_loop.  */
 505       gphi_iterator gsi_orig, gsi_new;
 506       edge orig_e = loop_preheader_edge (loop);
 507       edge new_e = loop_preheader_edge (new_loop);
 508
 509       for (gsi_orig = gsi_start_phis (loop->header),
 510            gsi_new = gsi_start_phis (new_loop->header);
 511            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 512            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 513         {
 514           gphi *orig_phi = gsi_orig.phi ();
 515           gphi *new_phi = gsi_new.phi ();
 516           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 517           location_t orig_locus
 518             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 519
 520           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 521         }
 522     }
 523
 524   free (new_bbs);
 525   free (bbs);
 526
 527   checking_verify_dominators (CDI_DOMINATORS);
 528
 529   return new_loop;
 530 }
 531
 532
 533 /* Given the condition expression COND, put it as the last statement of
 534    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 535    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 536    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
 537    new edge as irreducible if IRREDUCIBLE_P is true.  */
 538
 539 static edge
 540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 541                        basic_block guard_to, basic_block dom_bb,
 542                        profile_probability probability, bool irreducible_p)
 543 {
 544   gimple_stmt_iterator gsi;
 545   edge new_e, enter_e;
 546   gcond *cond_stmt;
 547   gimple_seq gimplify_stmt_list = NULL;
 548
 549   enter_e = EDGE_SUCC (guard_bb, 0);
 550   enter_e->flags &= ~EDGE_FALLTHRU;
 551   enter_e->flags |= EDGE_FALSE_VALUE;
 552   gsi = gsi_last_bb (guard_bb);
 553
 554   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 555                                  NULL_TREE);
 556   if (gimplify_stmt_list)
 557     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 558
 559   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 560   gsi = gsi_last_bb (guard_bb);
 561   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 562
 563   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 564   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 565
 566   new_e->probability = probability;
 567   if (irreducible_p)
 568     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
 569
 570   enter_e->probability = probability.invert ();
 571   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 572
 573   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 574   if (enter_e->dest->loop_father->header == enter_e->dest)
 575     split_edge (enter_e);
 576
 577   return new_e;
 578 }
 579
 580
 581 /* This function verifies that the following restrictions apply to LOOP:
 582    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 583        for innermost loop and 5 basic blocks for outer-loop.
 584    (2) it is single entry, single exit
 585    (3) its exit condition is the last stmt in the header
 586    (4) E is the entry/exit edge of LOOP.
 587  */
 588
 589 bool
 590 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 591 {
 592   edge exit_e = single_exit (loop);
 593   edge entry_e = loop_preheader_edge (loop);
 594   gcond *orig_cond = get_loop_exit_condition (loop);
 595   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 596   unsigned int num_bb = loop->inner? 5 : 2;
 597
 598   /* All loops have an outer scope; the only case loop->outer is NULL is for
 599      the function itself.  */
 600   if (!loop_outer (loop)
 601       || loop->num_nodes != num_bb
 602       || !empty_block_p (loop->latch)
 603       || !single_exit (loop)
 604       /* Verify that new loop exit condition can be trivially modified.  */
 605       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 606       || (e != exit_e && e != entry_e))
 607     return false;
 608
 609   return true;
 610 }
 611
 612 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 613    in the exit bb and rename all the uses after the loop.  This simplifies
 614    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 615    (but normally loop closed SSA form doesn't require virtual PHIs to be
 616    in the same form).  Doing this early simplifies the checking what
 617    uses should be renamed.  */
 618
 619 static void
 620 create_lcssa_for_virtual_phi (struct loop *loop)
 621 {
 622   gphi_iterator gsi;
 623   edge exit_e = single_exit (loop);
 624
 625   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 626     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 627       {
 628         gphi *phi = gsi.phi ();
 629         for (gsi = gsi_start_phis (exit_e->dest);
 630              !gsi_end_p (gsi); gsi_next (&gsi))
 631           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 632             break;
 633         if (gsi_end_p (gsi))
 634           {
 635             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 636             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 637             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 638             imm_use_iterator imm_iter;
 639             gimple *stmt;
 640             use_operand_p use_p;
 641
 642             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 643             gimple_phi_set_result (new_phi, new_vop);
 644             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 645               if (stmt != new_phi
 646                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 647                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 648                   SET_USE (use_p, new_vop);
 649           }
 650         break;
 651       }
 652
 653 }
 654
 655 /* Function vect_get_loop_location.
 656
 657    Extract the location of the loop in the source code.
 658    If the loop is not well formed for vectorization, an estimated
 659    location is calculated.
 660    Return the loop location if succeed and NULL if not.  */
 661
 662 source_location
 663 find_loop_location (struct loop *loop)
 664 {
 665   gimple *stmt = NULL;
 666   basic_block bb;
 667   gimple_stmt_iterator si;
 668
 669   if (!loop)
 670     return UNKNOWN_LOCATION;
 671
 672   stmt = get_loop_exit_condition (loop);
 673
 674   if (stmt
 675       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 676     return gimple_location (stmt);
 677
 678   /* If we got here the loop is probably not "well formed",
 679      try to estimate the loop location */
 680
 681   if (!loop->header)
 682     return UNKNOWN_LOCATION;
 683
 684   bb = loop->header;
 685
 686   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 687     {
 688       stmt = gsi_stmt (si);
 689       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 690         return gimple_location (stmt);
 691     }
 692
 693   return UNKNOWN_LOCATION;
 694 }
 695
 696 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 697
 698 static bool
 699 iv_phi_p (gphi *phi)
 700 {
 701   if (virtual_operand_p (PHI_RESULT (phi)))
 702     return false;
 703
 704   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 705   gcc_assert (stmt_info != NULL);
 706   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 707       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 708     return false;
 709
 710   return true;
 711 }
 712
 713 /* Function vect_can_advance_ivs_p
 714
 715    In case the number of iterations that LOOP iterates is unknown at compile
 716    time, an epilog loop will be generated, and the loop induction variables
 717    (IVs) will be "advanced" to the value they are supposed to take just before
 718    the epilog loop.  Here we check that the access function of the loop IVs
 719    and the expression that represents the loop bound are simple enough.
 720    These restrictions will be relaxed in the future.  */
 721
 722 bool
 723 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 724 {
 725   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 726   basic_block bb = loop->header;
 727   gphi_iterator gsi;
 728
 729   /* Analyze phi functions of the loop header.  */
 730
 731   if (dump_enabled_p ())
 732     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 733   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 734     {
 735       tree evolution_part;
 736
 737       gphi *phi = gsi.phi ();
 738       if (dump_enabled_p ())
 739         {
 740           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 741           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 742         }
 743
 744       /* Skip virtual phi's. The data dependences that are associated with
 745          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 746
 747          Skip reduction phis.  */
 748       if (!iv_phi_p (phi))
 749         {
 750           if (dump_enabled_p ())
 751             dump_printf_loc (MSG_NOTE, vect_location,
 752                              "reduc or virtual phi. skip.\n");
 753           continue;
 754         }
 755
 756       /* Analyze the evolution function.  */
 757
 758       evolution_part
 759         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 760       if (evolution_part == NULL_TREE)
 761         {
 762           if (dump_enabled_p ())
 763             dump_printf (MSG_MISSED_OPTIMIZATION,
 764                          "No access function or evolution.\n");
 765           return false;
 766         }
 767
 768       /* FORNOW: We do not transform initial conditions of IVs
 769          which evolution functions are not invariants in the loop.  */
 770
 771       if (!expr_invariant_in_loop_p (loop, evolution_part))
 772         {
 773           if (dump_enabled_p ())
 774             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 775                              "evolution not invariant in loop.\n");
 776           return false;
 777         }
 778
 779       /* FORNOW: We do not transform initial conditions of IVs
 780          which evolution functions are a polynomial of degree >= 2.  */
 781
 782       if (tree_is_chrec (evolution_part))
 783         {
 784           if (dump_enabled_p ())
 785             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 786                              "evolution is chrec.\n");
 787           return false;
 788         }
 789     }
 790
 791   return true;
 792 }
 793
 794
 795 /*   Function vect_update_ivs_after_vectorizer.
 796
 797      "Advance" the induction variables of LOOP to the value they should take
 798      after the execution of LOOP.  This is currently necessary because the
 799      vectorizer does not handle induction variables that are used after the
 800      loop.  Such a situation occurs when the last iterations of LOOP are
 801      peeled, because:
 802      1. We introduced new uses after LOOP for IVs that were not originally used
 803         after LOOP: the IVs of LOOP are now used by an epilog loop.
 804      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 805         times, whereas the loop IVs should be bumped N times.
 806
 807      Input:
 808      - LOOP - a loop that is going to be vectorized. The last few iterations
 809               of LOOP were peeled.
 810      - NITERS - the number of iterations that LOOP executes (before it is
 811                 vectorized). i.e, the number of times the ivs should be bumped.
 812      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 813                   coming out from LOOP on which there are uses of the LOOP ivs
 814                   (this is the path from LOOP->exit to epilog_loop->preheader).
 815
 816                   The new definitions of the ivs are placed in LOOP->exit.
 817                   The phi args associated with the edge UPDATE_E in the bb
 818                   UPDATE_E->dest are updated accordingly.
 819
 820      Assumption 1: Like the rest of the vectorizer, this function assumes
 821      a single loop exit that has a single predecessor.
 822
 823      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 824      organized in the same order.
 825
 826      Assumption 3: The access function of the ivs is simple enough (see
 827      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 828
 829      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 830      coming out of LOOP on which the ivs of LOOP are used (this is the path
 831      that leads to the epilog loop; other paths skip the epilog loop).  This
 832      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 833      needs to have its phis updated.
 834  */
 835
 836 static void
 837 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 838                                   tree niters, edge update_e)
 839 {
 840   gphi_iterator gsi, gsi1;
 841   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 842   basic_block update_bb = update_e->dest;
 843   basic_block exit_bb = single_exit (loop)->dest;
 844
 845   /* Make sure there exists a single-predecessor exit bb:  */
 846   gcc_assert (single_pred_p (exit_bb));
 847   gcc_assert (single_succ_edge (exit_bb) == update_e);
 848
 849   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 850        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 851        gsi_next (&gsi), gsi_next (&gsi1))
 852     {
 853       tree init_expr;
 854       tree step_expr, off;
 855       tree type;
 856       tree var, ni, ni_name;
 857       gimple_stmt_iterator last_gsi;
 858
 859       gphi *phi = gsi.phi ();
 860       gphi *phi1 = gsi1.phi ();
 861       if (dump_enabled_p ())
 862         {
 863           dump_printf_loc (MSG_NOTE, vect_location,
 864                            "vect_update_ivs_after_vectorizer: phi: ");
 865           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 866         }
 867
 868       /* Skip reduction and virtual phis.  */
 869       if (!iv_phi_p (phi))
 870         {
 871           if (dump_enabled_p ())
 872             dump_printf_loc (MSG_NOTE, vect_location,
 873                              "reduc or virtual phi. skip.\n");
 874           continue;
 875         }
 876
 877       type = TREE_TYPE (gimple_phi_result (phi));
 878       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 879       step_expr = unshare_expr (step_expr);
 880
 881       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 882          of degree >= 2 or exponential.  */
 883       gcc_assert (!tree_is_chrec (step_expr));
 884
 885       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 886
 887       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 888                          fold_convert (TREE_TYPE (step_expr), niters),
 889                          step_expr);
 890       if (POINTER_TYPE_P (type))
 891         ni = fold_build_pointer_plus (init_expr, off);
 892       else
 893         ni = fold_build2 (PLUS_EXPR, type,
 894                           init_expr, fold_convert (type, off));
 895
 896       var = create_tmp_var (type, "tmp");
 897
 898       last_gsi = gsi_last_bb (exit_bb);
 899       gimple_seq new_stmts = NULL;
 900       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 901       /* Exit_bb shouldn't be empty.  */
 902       if (!gsi_end_p (last_gsi))
 903         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 904       else
 905         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 906
 907       /* Fix phi expressions in the successor bb.  */
 908       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 909     }
 910 }
 911
 912 /* Function vect_gen_prolog_loop_niters
 913
 914    Generate the number of iterations which should be peeled as prolog for the
 915    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 916    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 917    As a result, after the execution of this loop, the data reference DR will
 918    refer to an aligned location.  The following computation is generated:
 919
 920    If the misalignment of DR is known at compile time:
 921      addr_mis = int mis = DR_MISALIGNMENT (dr);
 922    Else, compute address misalignment in bytes:
 923      addr_mis = addr & (vectype_align - 1)
 924
 925    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 926
 927    (elem_size = element type size; an element is the scalar element whose type
 928    is the inner type of the vectype)
 929
 930    The computations will be emitted at the end of BB.  We also compute and
 931    store upper bound (included) of the result in BOUND.
 932
 933    When the step of the data-ref in the loop is not 1 (as in interleaved data
 934    and SLP), the number of iterations of the prolog must be divided by the step
 935    (which is equal to the size of interleaved group).
 936
 937    The above formulas assume that VF == number of elements in the vector. This
 938    may not hold when there are multiple-types in the loop.
 939    In this case, for some data-references in the loop the VF does not represent
 940    the number of elements that fit in the vector.  Therefore, instead of VF we
 941    use TYPE_VECTOR_SUBPARTS.  */
 942
 943 static tree
 944 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 945                              basic_block bb, int *bound)
 946 {
 947   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 948   tree var;
 949   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 950   gimple_seq stmts = NULL, new_stmts = NULL;
 951   tree iters, iters_name;
 952   gimple *dr_stmt = DR_STMT (dr);
 953   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 954   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 955   unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
 956
 957   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 958     {
 959       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 960
 961       if (dump_enabled_p ())
 962         dump_printf_loc (MSG_NOTE, vect_location,
 963                          "known peeling = %d.\n", npeel);
 964
 965       iters = build_int_cst (niters_type, npeel);
 966       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 967     }
 968   else
 969     {
 970       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 971       tree offset = negative
 972           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 973       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 974                                                               &stmts, offset);
 975       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 976       tree target_align_minus_1 = build_int_cst (type, target_align - 1);
 977       HOST_WIDE_INT elem_size
 978         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 979       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 980       HOST_WIDE_INT align_in_elems = target_align / elem_size;
 981       tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
 982       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
 983       tree misalign_in_bytes;
 984       tree misalign_in_elems;
 985
 986       /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
 987       misalign_in_bytes
 988         = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 989                        target_align_minus_1);
 990
 991       /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
 992       misalign_in_elems
 993         = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
 994
 995       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
 996                                  & (align_in_elems - 1)).  */
 997       if (negative)
 998         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
 999                              align_in_elems_tree);
1000       else
1001         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1002                              misalign_in_elems);
1003       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1004       iters = fold_convert (niters_type, iters);
1005       *bound = align_in_elems - 1;
1006     }
1007
1008   if (dump_enabled_p ())
1009     {
1010       dump_printf_loc (MSG_NOTE, vect_location,
1011                        "niters for prolog loop: ");
1012       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1013       dump_printf (MSG_NOTE, "\n");
1014     }
1015
1016   var = create_tmp_var (niters_type, "prolog_loop_niters");
1017   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1018
1019   if (new_stmts)
1020     gimple_seq_add_seq (&stmts, new_stmts);
1021   if (stmts)
1022     {
1023       gcc_assert (single_succ_p (bb));
1024       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1025       if (gsi_end_p (gsi))
1026         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1027       else
1028         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1029     }
1030   return iters_name;
1031 }
1032
1033
1034 /* Function vect_update_init_of_dr
1035
1036    NITERS iterations were peeled from LOOP.  DR represents a data reference
1037    in LOOP.  This function updates the information recorded in DR to
1038    account for the fact that the first NITERS iterations had already been
1039    executed.  Specifically, it updates the OFFSET field of DR.  */
1040
1041 static void
1042 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1043 {
1044   tree offset = DR_OFFSET (dr);
1045
1046   niters = fold_build2 (MULT_EXPR, sizetype,
1047                         fold_convert (sizetype, niters),
1048                         fold_convert (sizetype, DR_STEP (dr)));
1049   offset = fold_build2 (PLUS_EXPR, sizetype,
1050                         fold_convert (sizetype, offset), niters);
1051   DR_OFFSET (dr) = offset;
1052 }
1053
1054
1055 /* Function vect_update_inits_of_drs
1056
1057    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1058    This function updates the information recorded for the data references in
1059    the loop to account for the fact that the first NITERS iterations had
1060    already been executed.  Specifically, it updates the initial_condition of
1061    the access_function of all the data_references in the loop.  */
1062
1063 static void
1064 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1065 {
1066   unsigned int i;
1067   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1068   struct data_reference *dr;
1069
1070   if (dump_enabled_p ())
1071     dump_printf_loc (MSG_NOTE, vect_location,
1072                      "=== vect_update_inits_of_dr ===\n");
1073
1074   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1075   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1076     {
1077       gimple_seq seq;
1078       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1079       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1080
1081       niters = fold_convert (sizetype, niters);
1082       niters = force_gimple_operand (niters, &seq, false, var);
1083       if (seq)
1084         {
1085           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1086           gcc_assert (!new_bb);
1087         }
1088     }
1089
1090   FOR_EACH_VEC_ELT (datarefs, i, dr)
1091     vect_update_init_of_dr (dr, niters);
1092 }
1093
1094
1095 /* This function builds ni_name = number of iterations.  Statements
1096    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
1097    it to TRUE if new ssa_var is generated.  */
1098
1099 tree
1100 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1101 {
1102   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1103   if (TREE_CODE (ni) == INTEGER_CST)
1104     return ni;
1105   else
1106     {
1107       tree ni_name, var;
1108       gimple_seq stmts = NULL;
1109       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1110
1111       var = create_tmp_var (TREE_TYPE (ni), "niters");
1112       ni_name = force_gimple_operand (ni, &stmts, false, var);
1113       if (stmts)
1114         {
1115           gsi_insert_seq_on_edge_immediate (pe, stmts);
1116           if (new_var_p != NULL)
1117             *new_var_p = true;
1118         }
1119
1120       return ni_name;
1121     }
1122 }
1123
1124 /* Calculate the number of iterations above which vectorized loop will be
1125    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1126    of prolog loop.  If it's integer const, the integer number is also passed
1127    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1128    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1129    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1130    (rather than vectorized) loop will be executed.  This function stores
1131    upper bound (included) of the result in BOUND_SCALAR.  */
1132
1133 static tree
1134 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1135                              int bound_prolog, int vfm1, int th,
1136                              int *bound_scalar, bool check_profitability)
1137 {
1138   tree type = TREE_TYPE (niters_prolog);
1139   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1140                              build_int_cst (type, vfm1));
1141
1142   *bound_scalar = vfm1 + bound_prolog;
1143   if (check_profitability)
1144     {
1145       /* TH indicates the minimum niters of vectorized loop, while we
1146          compute the maximum niters of scalar loop.  */
1147       th--;
1148       /* Peeling for constant times.  */
1149       if (int_niters_prolog >= 0)
1150         {
1151           *bound_scalar = (int_niters_prolog + vfm1 < th
1152                             ? th
1153                             : vfm1 + int_niters_prolog);
1154           return build_int_cst (type, *bound_scalar);
1155         }
1156       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1157          bound (inlcuded) of niters of prolog loop.  */
1158       if (th >=  vfm1 + bound_prolog)
1159         {
1160           *bound_scalar = th;
1161           return build_int_cst (type, th);
1162         }
1163       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1164       else if (th > vfm1)
1165         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1166     }
1167   return niters;
1168 }
1169
1170 /* This function generates the following statements:
1171
1172    niters = number of iterations loop executes (after peeling)
1173    niters_vector = niters / vf
1174
1175    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1176    true if NITERS doesn't overflow.  */
1177
1178 void
1179 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1180                              tree *niters_vector_ptr, bool niters_no_overflow)
1181 {
1182   tree ni_minus_gap, var;
1183   tree niters_vector, type = TREE_TYPE (niters);
1184   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1185   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1186   tree log_vf = build_int_cst (type, exact_log2 (vf));
1187
1188   /* If epilogue loop is required because of data accesses with gaps, we
1189      subtract one iteration from the total number of iterations here for
1190      correct calculation of RATIO.  */
1191   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1192     {
1193       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1194                                   build_one_cst (type));
1195       if (!is_gimple_val (ni_minus_gap))
1196         {
1197           var = create_tmp_var (type, "ni_gap");
1198           gimple *stmts = NULL;
1199           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1200                                                true, var);
1201           gsi_insert_seq_on_edge_immediate (pe, stmts);
1202         }
1203     }
1204   else
1205     ni_minus_gap = niters;
1206
1207   /* Create: niters >> log2(vf) */
1208   /* If it's known that niters == number of latch executions + 1 doesn't
1209      overflow, we can generate niters >> log2(vf); otherwise we generate
1210      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1211      will be at least one.  */
1212   if (niters_no_overflow)
1213     niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1214   else
1215     niters_vector
1216       = fold_build2 (PLUS_EXPR, type,
1217                      fold_build2 (RSHIFT_EXPR, type,
1218                                   fold_build2 (MINUS_EXPR, type, ni_minus_gap,
1219                                                build_int_cst (type, vf)),
1220                                   log_vf),
1221                      build_int_cst (type, 1));
1222
1223   if (!is_gimple_val (niters_vector))
1224     {
1225       var = create_tmp_var (type, "bnd");
1226       gimple_seq stmts = NULL;
1227       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1228       gsi_insert_seq_on_edge_immediate (pe, stmts);
1229       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1230          we set range information to make niters analyzer's life easier.  */
1231       if (stmts != NULL)
1232         set_range_info (niters_vector, VR_RANGE,
1233                         wi::to_wide (build_int_cst (type, 1)),
1234                         wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1235                                                   TYPE_MAX_VALUE (type),
1236                                                   log_vf)));
1237     }
1238   *niters_vector_ptr = niters_vector;
1239
1240   return;
1241 }
1242
1243 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1244    loop specified by LOOP_VINFO after vectorization, compute the number
1245    of iterations before vectorization (niters_vector * vf) and store it
1246    to NITERS_VECTOR_MULT_VF_PTR.  */
1247
1248 static void
1249 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1250                                      tree niters_vector,
1251                                      tree *niters_vector_mult_vf_ptr)
1252 {
1253   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1254   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1255   tree type = TREE_TYPE (niters_vector);
1256   tree log_vf = build_int_cst (type, exact_log2 (vf));
1257   basic_block exit_bb = single_exit (loop)->dest;
1258
1259   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1260   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1261                                             niters_vector, log_vf);
1262   if (!is_gimple_val (niters_vector_mult_vf))
1263     {
1264       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1265       gimple_seq stmts = NULL;
1266       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1267                                                     &stmts, true, var);
1268       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1269       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1270     }
1271   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1272 }
1273
1274 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1275    from SECOND/FIRST and puts it at the original loop's preheader/exit
1276    edge, the two loops are arranged as below:
1277
1278        preheader_a:
1279      first_loop:
1280        header_a:
1281          i_1 = PHI<i_0, i_2>;
1282          ...
1283          i_2 = i_1 + 1;
1284          if (cond_a)
1285            goto latch_a;
1286          else
1287            goto between_bb;
1288        latch_a:
1289          goto header_a;
1290
1291        between_bb:
1292          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1293
1294      second_loop:
1295        header_b:
1296          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1297                                  or with i_2 if no LCSSA phi is created
1298                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1299          ...
1300          i_4 = i_3 + 1;
1301          if (cond_b)
1302            goto latch_b;
1303          else
1304            goto exit_bb;
1305        latch_b:
1306          goto header_b;
1307
1308        exit_bb:
1309
1310    This function creates loop closed SSA for the first loop; update the
1311    second loop's PHI nodes by replacing argument on incoming edge with the
1312    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1313    is false, Loop closed ssa phis will only be created for non-iv phis for
1314    the first loop.
1315
1316    This function assumes exit bb of the first loop is preheader bb of the
1317    second loop, i.e, between_bb in the example code.  With PHIs updated,
1318    the second loop will execute rest iterations of the first.  */
1319
1320 static void
1321 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1322                                    struct loop *first, struct loop *second,
1323                                    bool create_lcssa_for_iv_phis)
1324 {
1325   gphi_iterator gsi_update, gsi_orig;
1326   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1327
1328   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1329   edge second_preheader_e = loop_preheader_edge (second);
1330   basic_block between_bb = single_exit (first)->dest;
1331
1332   gcc_assert (between_bb == second_preheader_e->src);
1333   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1334   /* Either the first loop or the second is the loop to be vectorized.  */
1335   gcc_assert (loop == first || loop == second);
1336
1337   for (gsi_orig = gsi_start_phis (first->header),
1338        gsi_update = gsi_start_phis (second->header);
1339        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1340        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1341     {
1342       gphi *orig_phi = gsi_orig.phi ();
1343       gphi *update_phi = gsi_update.phi ();
1344
1345       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1346       /* Generate lcssa PHI node for the first loop.  */
1347       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1348       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1349         {
1350           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1351           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1352           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1353           arg = new_res;
1354         }
1355
1356       /* Update PHI node in the second loop by replacing arg on the loop's
1357          incoming edge.  */
1358       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1359     }
1360 }
1361
1362 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1363    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1364    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1365    appear like below:
1366
1367        guard_bb:
1368          if (cond)
1369            goto merge_bb;
1370          else
1371            goto skip_loop;
1372
1373      skip_loop:
1374        header_a:
1375          i_1 = PHI<i_0, i_2>;
1376          ...
1377          i_2 = i_1 + 1;
1378          if (cond_a)
1379            goto latch_a;
1380          else
1381            goto exit_a;
1382        latch_a:
1383          goto header_a;
1384
1385        exit_a:
1386          i_5 = PHI<i_2>;
1387
1388        merge_bb:
1389          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1390
1391      update_loop:
1392        header_b:
1393          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1394          ...
1395          i_4 = i_3 + 1;
1396          if (cond_b)
1397            goto latch_b;
1398          else
1399            goto exit_bb;
1400        latch_b:
1401          goto header_b;
1402
1403        exit_bb:
1404
1405    This function creates PHI nodes at merge_bb and replaces the use of i_5
1406    in the update_loop's PHI node with the result of new PHI result.  */
1407
1408 static void
1409 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1410                                     struct loop *update_loop,
1411                                     edge guard_edge, edge merge_edge)
1412 {
1413   source_location merge_loc, guard_loc;
1414   edge orig_e = loop_preheader_edge (skip_loop);
1415   edge update_e = loop_preheader_edge (update_loop);
1416   gphi_iterator gsi_orig, gsi_update;
1417
1418   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1419         gsi_update = gsi_start_phis (update_loop->header));
1420        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1421        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1422     {
1423       gphi *orig_phi = gsi_orig.phi ();
1424       gphi *update_phi = gsi_update.phi ();
1425
1426       /* Generate new phi node at merge bb of the guard.  */
1427       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1428       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1429
1430       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1431          args in NEW_PHI for these edges.  */
1432       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1433       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1434       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1435       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1436       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1437       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1438
1439       /* Update phi in UPDATE_PHI.  */
1440       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1441     }
1442 }
1443
1444 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1445    this function searches for the corresponding lcssa phi node in exit
1446    bb of LOOP.  If it is found, return the phi result; otherwise return
1447    NULL.  */
1448
1449 static tree
1450 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1451                 gphi *lcssa_phi)
1452 {
1453   gphi_iterator gsi;
1454   edge e = single_exit (loop);
1455
1456   gcc_assert (single_pred_p (e->dest));
1457   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1458     {
1459       gphi *phi = gsi.phi ();
1460       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1461                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1462         return PHI_RESULT (phi);
1463     }
1464   return NULL_TREE;
1465 }
1466
1467 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1468    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1469    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1470    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1471    The CFG looks like:
1472
1473      loop:
1474        header_a:
1475          i_1 = PHI<i_0, i_2>;
1476          ...
1477          i_2 = i_1 + 1;
1478          if (cond_a)
1479            goto latch_a;
1480          else
1481            goto exit_a;
1482        latch_a:
1483          goto header_a;
1484
1485        exit_a:
1486
1487        guard_bb:
1488          if (cond)
1489            goto merge_bb;
1490          else
1491            goto epilog_loop;
1492
1493        ;; fall_through_bb
1494
1495      epilog_loop:
1496        header_b:
1497          i_3 = PHI<i_2, i_4>;
1498          ...
1499          i_4 = i_3 + 1;
1500          if (cond_b)
1501            goto latch_b;
1502          else
1503            goto merge_bb;
1504        latch_b:
1505          goto header_b;
1506
1507        merge_bb:
1508          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1509
1510        exit_bb:
1511          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1512
1513    For each name used out side EPILOG (i.e - for each name that has a lcssa
1514    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1515    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1516    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1517    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1518    in exit_bb will also be updated.  */
1519
1520 static void
1521 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1522                                     edge guard_edge, edge merge_edge)
1523 {
1524   gphi_iterator gsi;
1525   basic_block merge_bb = guard_edge->dest;
1526
1527   gcc_assert (single_succ_p (merge_bb));
1528   edge e = single_succ_edge (merge_bb);
1529   basic_block exit_bb = e->dest;
1530   gcc_assert (single_pred_p (exit_bb));
1531   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1532
1533   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1534     {
1535       gphi *update_phi = gsi.phi ();
1536       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1537       /* This loop-closed-phi actually doesn't represent a use out of the
1538          loop - the phi arg is a constant.  */
1539       if (TREE_CODE (old_arg) != SSA_NAME)
1540         continue;
1541
1542       tree merge_arg = get_current_def (old_arg);
1543       if (!merge_arg)
1544         merge_arg = old_arg;
1545
1546       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1547       /* If the var is live after loop but not a reduction, we simply
1548          use the old arg.  */
1549       if (!guard_arg)
1550         guard_arg = old_arg;
1551
1552       /* Create new phi node in MERGE_BB:  */
1553       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1554       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1555
1556       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1557          the two PHI args in merge_phi for these edges.  */
1558       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1559       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1560
1561       /* Update the original phi in exit_bb.  */
1562       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1563     }
1564 }
1565
1566 /* EPILOG loop is duplicated from the original loop for vectorizing,
1567    the arg of its loop closed ssa PHI needs to be updated.  */
1568
1569 static void
1570 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1571 {
1572   gphi_iterator gsi;
1573   basic_block exit_bb = single_exit (epilog)->dest;
1574
1575   gcc_assert (single_pred_p (exit_bb));
1576   edge e = EDGE_PRED (exit_bb, 0);
1577   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1578     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1579 }
1580
1581 /* Function vect_do_peeling.
1582
1583    Input:
1584    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1585
1586        preheader:
1587      LOOP:
1588        header_bb:
1589          loop_body
1590          if (exit_loop_cond) goto exit_bb
1591          else                goto header_bb
1592        exit_bb:
1593
1594    - NITERS: The number of iterations of the loop.
1595    - NITERSM1: The number of iterations of the loop's latch.
1596    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1597    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1598                               CHECK_PROFITABILITY is true.
1599    Output:
1600    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1601
1602    This function peels prolog and epilog from the loop, adds guards skipping
1603    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1604    would look like:
1605
1606        guard_bb_1:
1607          if (prefer_scalar_loop) goto merge_bb_1
1608          else                    goto guard_bb_2
1609
1610        guard_bb_2:
1611          if (skip_prolog) goto merge_bb_2
1612          else             goto prolog_preheader
1613
1614        prolog_preheader:
1615      PROLOG:
1616        prolog_header_bb:
1617          prolog_body
1618          if (exit_prolog_cond) goto prolog_exit_bb
1619          else                  goto prolog_header_bb
1620        prolog_exit_bb:
1621
1622        merge_bb_2:
1623
1624        vector_preheader:
1625      VECTOR LOOP:
1626        vector_header_bb:
1627          vector_body
1628          if (exit_vector_cond) goto vector_exit_bb
1629          else                  goto vector_header_bb
1630        vector_exit_bb:
1631
1632        guard_bb_3:
1633          if (skip_epilog) goto merge_bb_3
1634          else             goto epilog_preheader
1635
1636        merge_bb_1:
1637
1638        epilog_preheader:
1639      EPILOG:
1640        epilog_header_bb:
1641          epilog_body
1642          if (exit_epilog_cond) goto merge_bb_3
1643          else                  goto epilog_header_bb
1644
1645        merge_bb_3:
1646
1647    Note this function peels prolog and epilog only if it's necessary,
1648    as well as guards.
1649    Returns created epilogue or NULL.
1650
1651    TODO: Guard for prefer_scalar_loop should be emitted along with
1652    versioning conditions if loop versioning is needed.  */
1653
1654
1655 struct loop *
1656 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1657                  tree *niters_vector, int th, bool check_profitability,
1658                  bool niters_no_overflow)
1659 {
1660   edge e, guard_e;
1661   tree type = TREE_TYPE (niters), guard_cond;
1662   basic_block guard_bb, guard_to;
1663   profile_probability prob_prolog, prob_vector, prob_epilog;
1664   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1665   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1666   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1667   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1668                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1669
1670   if (!prolog_peeling && !epilog_peeling)
1671     return NULL;
1672
1673   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1674   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1675     vf = 3;
1676   prob_prolog = prob_epilog = profile_probability::guessed_always ()
1677                         .apply_scale (vf - 1, vf);
1678   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1679
1680   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1681   struct loop *first_loop = loop;
1682   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1683   create_lcssa_for_virtual_phi (loop);
1684   update_ssa (TODO_update_ssa_only_virtuals);
1685
1686   if (MAY_HAVE_DEBUG_STMTS)
1687     {
1688       gcc_assert (!adjust_vec.exists ());
1689       adjust_vec.create (32);
1690     }
1691   initialize_original_copy_tables ();
1692
1693   /* Prolog loop may be skipped.  */
1694   bool skip_prolog = (prolog_peeling != 0);
1695   /* Skip to epilog if scalar loop may be preferred.  It's only needed
1696      when we peel for epilog loop and when it hasn't been checked with
1697      loop versioning.  */
1698   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1699                       && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
1700   /* Epilog loop must be executed if the number of iterations for epilog
1701      loop is known at compile time, otherwise we need to add a check at
1702      the end of vector loop and skip to the end of epilog loop.  */
1703   bool skip_epilog = (prolog_peeling < 0
1704                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1705   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1706   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1707     skip_epilog = false;
1708
1709   /* Record the anchor bb at which guard should be placed if scalar loop
1710      may be preferred.  */
1711   basic_block anchor = loop_preheader_edge (loop)->src;
1712   if (skip_vector)
1713     {
1714       split_edge (loop_preheader_edge (loop));
1715
1716       /* Due to the order in which we peel prolog and epilog, we first
1717          propagate probability to the whole loop.  The purpose is to
1718          avoid adjusting probabilities of both prolog and vector loops
1719          separately.  Note in this case, the probability of epilog loop
1720          needs to be scaled back later.  */
1721       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1722       if (prob_vector.initialized_p ())
1723       scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1724       scale_loop_profile (loop, prob_vector, bound);
1725     }
1726
1727   tree niters_prolog = build_int_cst (type, 0);
1728   source_location loop_loc = find_loop_location (loop);
1729   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1730   if (prolog_peeling)
1731     {
1732       e = loop_preheader_edge (loop);
1733       if (!slpeel_can_duplicate_loop_p (loop, e))
1734         {
1735           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1736                            "loop can't be duplicated to preheader edge.\n");
1737           gcc_unreachable ();
1738         }
1739       /* Peel prolog and put it on preheader edge of loop.  */
1740       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1741       if (!prolog)
1742         {
1743           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1744                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1745           gcc_unreachable ();
1746         }
1747       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1748       first_loop = prolog;
1749       reset_original_copy_tables ();
1750
1751       /* Generate and update the number of iterations for prolog loop.  */
1752       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1753                                                    &bound_prolog);
1754       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1755
1756       /* Skip the prolog loop.  */
1757       if (skip_prolog)
1758         {
1759           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1760                                     niters_prolog, build_int_cst (type, 0));
1761           guard_bb = loop_preheader_edge (prolog)->src;
1762           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1763           guard_to = split_edge (loop_preheader_edge (loop));
1764           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1765                                            guard_to, guard_bb,
1766                                            prob_prolog.invert (),
1767                                            irred_flag);
1768           e = EDGE_PRED (guard_to, 0);
1769           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1770           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1771
1772           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1773           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1774         }
1775       /* Update init address of DRs.  */
1776       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1777       /* Update niters for vector loop.  */
1778       LOOP_VINFO_NITERS (loop_vinfo)
1779         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1780       LOOP_VINFO_NITERSM1 (loop_vinfo)
1781         = fold_build2 (MINUS_EXPR, type,
1782                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1783       bool new_var_p = false;
1784       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1785       /* It's guaranteed that vector loop bound before vectorization is at
1786          least VF, so set range information for newly generated var.  */
1787       if (new_var_p)
1788         set_range_info (niters, VR_RANGE,
1789                         wi::to_wide (build_int_cst (type, vf)),
1790                         wi::to_wide (TYPE_MAX_VALUE (type)));
1791
1792       /* Prolog iterates at most bound_prolog times, latch iterates at
1793          most bound_prolog - 1 times.  */
1794       record_niter_bound (prolog, bound_prolog - 1, false, true);
1795       delete_update_ssa ();
1796       adjust_vec_debug_stmts ();
1797       scev_reset ();
1798     }
1799
1800   if (epilog_peeling)
1801     {
1802       e = single_exit (loop);
1803       if (!slpeel_can_duplicate_loop_p (loop, e))
1804         {
1805           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1806                            "loop can't be duplicated to exit edge.\n");
1807           gcc_unreachable ();
1808         }
1809       /* Peel epilog and put it on exit edge of loop.  */
1810       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1811       if (!epilog)
1812         {
1813           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1814                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1815           gcc_unreachable ();
1816         }
1817       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1818
1819       /* Scalar version loop may be preferred.  In this case, add guard
1820          and skip to epilog.  Note this only happens when the number of
1821          iterations of loop is unknown at compile time, otherwise this
1822          won't be vectorized.  */
1823       if (skip_vector)
1824         {
1825           /* Additional epilogue iteration is peeled if gap exists.  */
1826           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1827           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1828                                                 bound_prolog,
1829                                                 peel_for_gaps ? vf : vf - 1,
1830                                                 th, &bound_scalar,
1831                                                 check_profitability);
1832           /* Build guard against NITERSM1 since NITERS may overflow.  */
1833           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1834           guard_bb = anchor;
1835           guard_to = split_edge (loop_preheader_edge (epilog));
1836           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1837                                            guard_to, guard_bb,
1838                                            prob_vector.invert (),
1839                                            irred_flag);
1840           e = EDGE_PRED (guard_to, 0);
1841           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1842           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1843
1844           /* Simply propagate profile info from guard_bb to guard_to which is
1845              a merge point of control flow.  */
1846           guard_to->count = guard_bb->count;
1847           /* Scale probability of epilog loop back.
1848              FIXME: We should avoid scaling down and back up.  Profile may
1849              get lost if we scale down to 0.  */
1850           int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE
1851                          / prob_vector.to_reg_br_prob_base ();
1852           basic_block *bbs = get_loop_body (epilog);
1853           scale_bbs_frequencies_int (bbs, epilog->num_nodes, scale_up,
1854                                      REG_BR_PROB_BASE);
1855           free (bbs);
1856         }
1857
1858       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1859       tree niters_vector_mult_vf;
1860       /* If loop is peeled for non-zero constant times, now niters refers to
1861          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1862          overflows.  */
1863       niters_no_overflow |= (prolog_peeling > 0);
1864       vect_gen_vector_loop_niters (loop_vinfo, niters,
1865                                    niters_vector, niters_no_overflow);
1866       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1867                                            &niters_vector_mult_vf);
1868       /* Update IVs of original loop as if they were advanced by
1869          niters_vector_mult_vf steps.  */
1870       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1871       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1872       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1873                                         update_e);
1874
1875       if (skip_epilog)
1876         {
1877           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1878                                     niters, niters_vector_mult_vf);
1879           guard_bb = single_exit (loop)->dest;
1880           guard_to = split_edge (single_exit (epilog));
1881           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1882                                            skip_vector ? anchor : guard_bb,
1883                                            prob_epilog.invert (),
1884                                            irred_flag);
1885           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1886                                               single_exit (epilog));
1887           /* Only need to handle basic block before epilog loop if it's not
1888              the guard_bb, which is the case when skip_vector is true.  */
1889           if (guard_bb != bb_before_epilog)
1890             {
1891               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
1892
1893               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
1894             }
1895           scale_loop_profile (epilog, prob_epilog, bound);
1896         }
1897       else
1898         slpeel_update_phi_nodes_for_lcssa (epilog);
1899
1900       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1901       /* We share epilog loop with scalar version loop.  */
1902       bound = MAX (bound, bound_scalar - 1);
1903       record_niter_bound (epilog, bound, false, true);
1904
1905       delete_update_ssa ();
1906       adjust_vec_debug_stmts ();
1907       scev_reset ();
1908     }
1909   adjust_vec.release ();
1910   free_original_copy_tables ();
1911
1912   return epilog;
1913 }
1914
1915 /* Function vect_create_cond_for_niters_checks.
1916
1917    Create a conditional expression that represents the run-time checks for
1918    loop's niter.  The loop is guaranteed to to terminate if the run-time
1919    checks hold.
1920
1921    Input:
1922    COND_EXPR  - input conditional expression.  New conditions will be chained
1923                 with logical AND operation.  If it is NULL, then the function
1924                 is used to return the number of alias checks.
1925    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1926                 to be checked.
1927
1928    Output:
1929    COND_EXPR - conditional expression.
1930
1931    The returned COND_EXPR is the conditional expression to be used in the
1932    if statement that controls which version of the loop gets executed at
1933    runtime.  */
1934
1935 static void
1936 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1937 {
1938   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1939
1940   if (*cond_expr)
1941     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1942                               *cond_expr, part_cond_expr);
1943   else
1944     *cond_expr = part_cond_expr;
1945 }
1946
1947 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
1948    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
1949
1950 static void
1951 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
1952 {
1953   if (*cond_expr)
1954     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1955                               *cond_expr, part_cond_expr);
1956   else
1957     *cond_expr = part_cond_expr;
1958 }
1959
1960 /* Function vect_create_cond_for_align_checks.
1961
1962    Create a conditional expression that represents the alignment checks for
1963    all of data references (array element references) whose alignment must be
1964    checked at runtime.
1965
1966    Input:
1967    COND_EXPR  - input conditional expression.  New conditions will be chained
1968                 with logical AND operation.
1969    LOOP_VINFO - two fields of the loop information are used.
1970                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1971                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1972
1973    Output:
1974    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1975                          expression.
1976    The returned value is the conditional expression to be used in the if
1977    statement that controls which version of the loop gets executed at runtime.
1978
1979    The algorithm makes two assumptions:
1980      1) The number of bytes "n" in a vector is a power of 2.
1981      2) An address "a" is aligned if a%n is zero and that this
1982         test can be done as a&(n-1) == 0.  For example, for 16
1983         byte vectors the test is a&0xf == 0.  */
1984
1985 static void
1986 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1987                                    tree *cond_expr,
1988                                    gimple_seq *cond_expr_stmt_list)
1989 {
1990   vec<gimple *> may_misalign_stmts
1991     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1992   gimple *ref_stmt;
1993   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1994   tree mask_cst;
1995   unsigned int i;
1996   tree int_ptrsize_type;
1997   char tmp_name[20];
1998   tree or_tmp_name = NULL_TREE;
1999   tree and_tmp_name;
2000   gimple *and_stmt;
2001   tree ptrsize_zero;
2002   tree part_cond_expr;
2003
2004   /* Check that mask is one less than a power of 2, i.e., mask is
2005      all zeros followed by all ones.  */
2006   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2007
2008   int_ptrsize_type = signed_type_for (ptr_type_node);
2009
2010   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2011      of the first vector of the i'th data reference. */
2012
2013   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2014     {
2015       gimple_seq new_stmt_list = NULL;
2016       tree addr_base;
2017       tree addr_tmp_name;
2018       tree new_or_tmp_name;
2019       gimple *addr_stmt, *or_stmt;
2020       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2021       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2022       bool negative = tree_int_cst_compare
2023         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2024       tree offset = negative
2025         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2026
2027       /* create: addr_tmp = (int)(address_of_first_vector) */
2028       addr_base =
2029         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2030                                               offset);
2031       if (new_stmt_list != NULL)
2032         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2033
2034       sprintf (tmp_name, "addr2int%d", i);
2035       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2036       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2037       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2038
2039       /* The addresses are OR together.  */
2040
2041       if (or_tmp_name != NULL_TREE)
2042         {
2043           /* create: or_tmp = or_tmp | addr_tmp */
2044           sprintf (tmp_name, "orptrs%d", i);
2045           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2046           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2047                                          or_tmp_name, addr_tmp_name);
2048           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2049           or_tmp_name = new_or_tmp_name;
2050         }
2051       else
2052         or_tmp_name = addr_tmp_name;
2053
2054     } /* end for i */
2055
2056   mask_cst = build_int_cst (int_ptrsize_type, mask);
2057
2058   /* create: and_tmp = or_tmp & mask  */
2059   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2060
2061   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2062                                   or_tmp_name, mask_cst);
2063   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2064
2065   /* Make and_tmp the left operand of the conditional test against zero.
2066      if and_tmp has a nonzero bit then some address is unaligned.  */
2067   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2068   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2069                                 and_tmp_name, ptrsize_zero);
2070   chain_cond_expr (cond_expr, part_cond_expr);
2071 }
2072
2073 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2074    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2075    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2076    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
2077
2078 static void
2079 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2080 {
2081   vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2082   unsigned int i;
2083   vec_object_pair *pair;
2084   FOR_EACH_VEC_ELT (pairs, i, pair)
2085     {
2086       tree addr1 = build_fold_addr_expr (pair->first);
2087       tree addr2 = build_fold_addr_expr (pair->second);
2088       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2089                                          addr1, addr2);
2090       chain_cond_expr (cond_expr, part_cond_expr);
2091     }
2092 }
2093
2094 /* Function vect_create_cond_for_alias_checks.
2095
2096    Create a conditional expression that represents the run-time checks for
2097    overlapping of address ranges represented by a list of data references
2098    relations passed as input.
2099
2100    Input:
2101    COND_EXPR  - input conditional expression.  New conditions will be chained
2102                 with logical AND operation.  If it is NULL, then the function
2103                 is used to return the number of alias checks.
2104    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2105                 to be checked.
2106
2107    Output:
2108    COND_EXPR - conditional expression.
2109
2110    The returned COND_EXPR is the conditional expression to be used in the if
2111    statement that controls which version of the loop gets executed at runtime.
2112 */
2113
2114 void
2115 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2116 {
2117   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2118     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2119
2120   if (comp_alias_ddrs.is_empty ())
2121     return;
2122
2123   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2124                                &comp_alias_ddrs, cond_expr);
2125   if (dump_enabled_p ())
2126     dump_printf_loc (MSG_NOTE, vect_location,
2127                      "created %u versioning for alias checks.\n",
2128                      comp_alias_ddrs.length ());
2129 }
2130
2131
2132 /* Function vect_loop_versioning.
2133
2134    If the loop has data references that may or may not be aligned or/and
2135    has data reference relations whose independence was not proven then
2136    two versions of the loop need to be generated, one which is vectorized
2137    and one which isn't.  A test is then generated to control which of the
2138    loops is executed.  The test checks for the alignment of all of the
2139    data references that may or may not be aligned.  An additional
2140    sequence of runtime tests is generated for each pairs of DDRs whose
2141    independence was not proven.  The vectorized version of loop is
2142    executed only if both alias and alignment tests are passed.
2143
2144    The test generated to check which version of loop is executed
2145    is modified to also check for profitability as indicated by the
2146    cost model threshold TH.
2147
2148    The versioning precondition(s) are placed in *COND_EXPR and
2149    *COND_EXPR_STMT_LIST.  */
2150
2151 void
2152 vect_loop_versioning (loop_vec_info loop_vinfo,
2153                       unsigned int th, bool check_profitability)
2154 {
2155   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2156   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2157   basic_block condition_bb;
2158   gphi_iterator gsi;
2159   gimple_stmt_iterator cond_exp_gsi;
2160   basic_block merge_bb;
2161   basic_block new_exit_bb;
2162   edge new_exit_e, e;
2163   gphi *orig_phi, *new_phi;
2164   tree cond_expr = NULL_TREE;
2165   gimple_seq cond_expr_stmt_list = NULL;
2166   tree arg;
2167   profile_probability prob = profile_probability::likely ();
2168   gimple_seq gimplify_stmt_list = NULL;
2169   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2170   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2171   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2172   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2173
2174   if (check_profitability)
2175     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2176                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2177                                             th - 1));
2178
2179   if (version_niter)
2180     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2181
2182   if (cond_expr)
2183     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2184                                         is_gimple_condexpr, NULL_TREE);
2185
2186   if (version_align)
2187     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2188                                        &cond_expr_stmt_list);
2189
2190   if (version_alias)
2191     {
2192       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2193       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2194     }
2195
2196   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2197                                       is_gimple_condexpr, NULL_TREE);
2198   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2199
2200   initialize_original_copy_tables ();
2201   if (scalar_loop)
2202     {
2203       edge scalar_e;
2204       basic_block preheader, scalar_preheader;
2205
2206       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2207          scale LOOP's frequencies instead.  */
2208       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2209                             prob, prob.invert (), prob, prob.invert (), true);
2210       scale_loop_frequencies (loop, prob);
2211       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2212          while we need to move it above LOOP's preheader.  */
2213       e = loop_preheader_edge (loop);
2214       scalar_e = loop_preheader_edge (scalar_loop);
2215       gcc_assert (empty_block_p (e->src)
2216                   && single_pred_p (e->src));
2217       gcc_assert (empty_block_p (scalar_e->src)
2218                   && single_pred_p (scalar_e->src));
2219       gcc_assert (single_pred_p (condition_bb));
2220       preheader = e->src;
2221       scalar_preheader = scalar_e->src;
2222       scalar_e = find_edge (condition_bb, scalar_preheader);
2223       e = single_pred_edge (preheader);
2224       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2225                                       scalar_preheader);
2226       redirect_edge_and_branch_force (scalar_e, preheader);
2227       redirect_edge_and_branch_force (e, condition_bb);
2228       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2229                                single_pred (condition_bb));
2230       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2231                                single_pred (scalar_preheader));
2232       set_immediate_dominator (CDI_DOMINATORS, preheader,
2233                                condition_bb);
2234     }
2235   else
2236     nloop = loop_version (loop, cond_expr, &condition_bb,
2237                           prob, prob.invert (), prob, prob.invert (), true);
2238
2239   if (version_niter)
2240     {
2241       /* The versioned loop could be infinite, we need to clear existing
2242          niter information which is copied from the original loop.  */
2243       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2244       vect_free_loop_info_assumptions (nloop);
2245       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2246       loop_constraint_set (loop, LOOP_C_INFINITE);
2247     }
2248
2249   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2250       && dump_enabled_p ())
2251     {
2252       if (version_alias)
2253         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2254                          "loop versioned for vectorization because of "
2255                          "possible aliasing\n");
2256       if (version_align)
2257         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2258                          "loop versioned for vectorization to enhance "
2259                          "alignment\n");
2260
2261     }
2262   free_original_copy_tables ();
2263
2264   /* Loop versioning violates an assumption we try to maintain during
2265      vectorization - that the loop exit block has a single predecessor.
2266      After versioning, the exit block of both loop versions is the same
2267      basic block (i.e. it has two predecessors). Just in order to simplify
2268      following transformations in the vectorizer, we fix this situation
2269      here by adding a new (empty) block on the exit-edge of the loop,
2270      with the proper loop-exit phis to maintain loop-closed-form.
2271      If loop versioning wasn't done from loop, but scalar_loop instead,
2272      merge_bb will have already just a single successor.  */
2273
2274   merge_bb = single_exit (loop)->dest;
2275   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2276     {
2277       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2278       new_exit_bb = split_edge (single_exit (loop));
2279       new_exit_e = single_exit (loop);
2280       e = EDGE_SUCC (new_exit_bb, 0);
2281
2282       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2283         {
2284           tree new_res;
2285           orig_phi = gsi.phi ();
2286           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2287           new_phi = create_phi_node (new_res, new_exit_bb);
2288           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2289           add_phi_arg (new_phi, arg, new_exit_e,
2290                        gimple_phi_arg_location_from_edge (orig_phi, e));
2291           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2292         }
2293     }
2294
2295   /* End loop-exit-fixes after versioning.  */
2296
2297   if (cond_expr_stmt_list)
2298     {
2299       cond_exp_gsi = gsi_last_bb (condition_bb);
2300       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2301                              GSI_SAME_STMT);
2302     }
2303   update_ssa (TODO_update_ssa);
2304 }