gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120             }
 121         }
 122       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 123            gsi_next (&gsi))
 124         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 125     }
 126 }
 127
 128
 129 struct adjust_info
 130 {
 131   tree from, to;
 132   basic_block bb;
 133 };
 134
 135 /* A stack of values to be adjusted in debug stmts.  We have to
 136    process them LIFO, so that the closest substitution applies.  If we
 137    processed them FIFO, without the stack, we might substitute uses
 138    with a PHI DEF that would soon become non-dominant, and when we got
 139    to the suitable one, it wouldn't have anything to substitute any
 140    more.  */
 141 static vec<adjust_info, va_heap> adjust_vec;
 142
 143 /* Adjust any debug stmts that referenced AI->from values to use the
 144    loop-closed AI->to, if the references are dominated by AI->bb and
 145    not by the definition of AI->from.  */
 146
 147 static void
 148 adjust_debug_stmts_now (adjust_info *ai)
 149 {
 150   basic_block bbphi = ai->bb;
 151   tree orig_def = ai->from;
 152   tree new_def = ai->to;
 153   imm_use_iterator imm_iter;
 154   gimple *stmt;
 155   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 156
 157   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 158
 159   /* Adjust any debug stmts that held onto non-loop-closed
 160      references.  */
 161   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 162     {
 163       use_operand_p use_p;
 164       basic_block bbuse;
 165
 166       if (!is_gimple_debug (stmt))
 167         continue;
 168
 169       gcc_assert (gimple_debug_bind_p (stmt));
 170
 171       bbuse = gimple_bb (stmt);
 172
 173       if ((bbuse == bbphi
 174            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 175           && !(bbuse == bbdef
 176                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 177         {
 178           if (new_def)
 179             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 180               SET_USE (use_p, new_def);
 181           else
 182             {
 183               gimple_debug_bind_reset_value (stmt);
 184               update_stmt (stmt);
 185             }
 186         }
 187     }
 188 }
 189
 190 /* Adjust debug stmts as scheduled before.  */
 191
 192 static void
 193 adjust_vec_debug_stmts (void)
 194 {
 195   if (!MAY_HAVE_DEBUG_STMTS)
 196     return;
 197
 198   gcc_assert (adjust_vec.exists ());
 199
 200   while (!adjust_vec.is_empty ())
 201     {
 202       adjust_debug_stmts_now (&adjust_vec.last ());
 203       adjust_vec.pop ();
 204     }
 205 }
 206
 207 /* Adjust any debug stmts that referenced FROM values to use the
 208    loop-closed TO, if the references are dominated by BB and not by
 209    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 210    will be postponed until adjust_vec_debug_stmts is called.  */
 211
 212 static void
 213 adjust_debug_stmts (tree from, tree to, basic_block bb)
 214 {
 215   adjust_info ai;
 216
 217   if (MAY_HAVE_DEBUG_STMTS
 218       && TREE_CODE (from) == SSA_NAME
 219       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 220       && ! virtual_operand_p (from))
 221     {
 222       ai.from = from;
 223       ai.to = to;
 224       ai.bb = bb;
 225
 226       if (adjust_vec.exists ())
 227         adjust_vec.safe_push (ai);
 228       else
 229         adjust_debug_stmts_now (&ai);
 230     }
 231 }
 232
 233 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 234    to adjust any debug stmts that referenced the old phi arg,
 235    presumably non-loop-closed references left over from other
 236    transformations.  */
 237
 238 static void
 239 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 240 {
 241   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 242
 243   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 244
 245   if (MAY_HAVE_DEBUG_STMTS)
 246     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 247                         gimple_bb (update_phi));
 248 }
 249
 250 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 251    that starts at zero, increases by one and its limit is NITERS.
 252
 253    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 254
 255 void
 256 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 257 {
 258   tree indx_before_incr, indx_after_incr;
 259   gcond *cond_stmt;
 260   gcond *orig_cond;
 261   edge exit_edge = single_exit (loop);
 262   gimple_stmt_iterator loop_cond_gsi;
 263   gimple_stmt_iterator incr_gsi;
 264   bool insert_after;
 265   tree init = build_int_cst (TREE_TYPE (niters), 0);
 266   tree step = build_int_cst (TREE_TYPE (niters), 1);
 267   source_location loop_loc;
 268   enum tree_code code;
 269
 270   orig_cond = get_loop_exit_condition (loop);
 271   gcc_assert (orig_cond);
 272   loop_cond_gsi = gsi_for_stmt (orig_cond);
 273
 274   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 275   create_iv (init, step, NULL_TREE, loop,
 276              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 277
 278   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 279                                               true, NULL_TREE, true,
 280                                               GSI_SAME_STMT);
 281   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 282                                      true, GSI_SAME_STMT);
 283
 284   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 285   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 286                                  NULL_TREE);
 287
 288   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 289
 290   /* Remove old loop exit test:  */
 291   gsi_remove (&loop_cond_gsi, true);
 292   free_stmt_vec_info (orig_cond);
 293
 294   loop_loc = find_loop_location (loop);
 295   if (dump_enabled_p ())
 296     {
 297       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 298         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 299                      LOCATION_LINE (loop_loc));
 300       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 301     }
 302
 303   /* Record the number of latch iterations.  */
 304   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 305                                      build_int_cst (TREE_TYPE (niters), 1));
 306 }
 307
 308 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 309    For all PHI arguments in FROM->dest and TO->dest from those
 310    edges ensure that TO->dest PHI arguments have current_def
 311    to that in from.  */
 312
 313 static void
 314 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 315 {
 316   gimple_stmt_iterator gsi_from, gsi_to;
 317
 318   for (gsi_from = gsi_start_phis (from->dest),
 319        gsi_to = gsi_start_phis (to->dest);
 320        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 321     {
 322       gimple *from_phi = gsi_stmt (gsi_from);
 323       gimple *to_phi = gsi_stmt (gsi_to);
 324       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 325       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 326       if (virtual_operand_p (from_arg))
 327         {
 328           gsi_next (&gsi_from);
 329           continue;
 330         }
 331       if (virtual_operand_p (to_arg))
 332         {
 333           gsi_next (&gsi_to);
 334           continue;
 335         }
 336       if (TREE_CODE (from_arg) != SSA_NAME)
 337         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 338       else
 339         {
 340           if (get_current_def (to_arg) == NULL_TREE)
 341             set_current_def (to_arg, get_current_def (from_arg));
 342         }
 343       gsi_next (&gsi_from);
 344       gsi_next (&gsi_to);
 345     }
 346
 347   gphi *from_phi = get_virtual_phi (from->dest);
 348   gphi *to_phi = get_virtual_phi (to->dest);
 349   if (from_phi)
 350     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 351                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 352 }
 353
 354
 355 /* Given LOOP this function generates a new copy of it and puts it
 356    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 357    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 358    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 359    entry or exit of LOOP.  */
 360
 361 struct loop *
 362 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 363                                         struct loop *scalar_loop, edge e)
 364 {
 365   struct loop *new_loop;
 366   basic_block *new_bbs, *bbs, *pbbs;
 367   bool at_exit;
 368   bool was_imm_dom;
 369   basic_block exit_dest;
 370   edge exit, new_exit;
 371   bool duplicate_outer_loop = false;
 372
 373   exit = single_exit (loop);
 374   at_exit = (e == exit);
 375   if (!at_exit && e != loop_preheader_edge (loop))
 376     return NULL;
 377
 378   if (scalar_loop == NULL)
 379     scalar_loop = loop;
 380
 381   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 382   pbbs = bbs + 1;
 383   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 384   /* Allow duplication of outer loops.  */
 385   if (scalar_loop->inner)
 386     duplicate_outer_loop = true;
 387   /* Check whether duplication is possible.  */
 388   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 389     {
 390       free (bbs);
 391       return NULL;
 392     }
 393
 394   /* Generate new loop structure.  */
 395   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 396   duplicate_subloops (scalar_loop, new_loop);
 397
 398   exit_dest = exit->dest;
 399   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 400                                           exit_dest) == loop->header ?
 401                  true : false);
 402
 403   /* Also copy the pre-header, this avoids jumping through hoops to
 404      duplicate the loop entry PHI arguments.  Create an empty
 405      pre-header unconditionally for this.  */
 406   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 407   edge entry_e = single_pred_edge (preheader);
 408   bbs[0] = preheader;
 409   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 410
 411   exit = single_exit (scalar_loop);
 412   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 413             &exit, 1, &new_exit, NULL,
 414             at_exit ? loop->latch : e->src, true);
 415   exit = single_exit (loop);
 416   basic_block new_preheader = new_bbs[0];
 417
 418   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 419
 420   if (scalar_loop != loop)
 421     {
 422       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 423          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 424          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 425          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 426          header) to have current_def set, so copy them over.  */
 427       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 428                                                 exit);
 429       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 430                                                            0),
 431                                                 EDGE_SUCC (loop->latch, 0));
 432     }
 433
 434   if (at_exit) /* Add the loop copy at exit.  */
 435     {
 436       if (scalar_loop != loop)
 437         {
 438           gphi_iterator gsi;
 439           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 440
 441           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 442                gsi_next (&gsi))
 443             {
 444               gphi *phi = gsi.phi ();
 445               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 446               location_t orig_locus
 447                 = gimple_phi_arg_location_from_edge (phi, e);
 448
 449               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 450             }
 451         }
 452       redirect_edge_and_branch_force (e, new_preheader);
 453       flush_pending_stmts (e);
 454       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 455       if (was_imm_dom || duplicate_outer_loop)
 456         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 457
 458       /* And remove the non-necessary forwarder again.  Keep the other
 459          one so we have a proper pre-header for the loop at the exit edge.  */
 460       redirect_edge_pred (single_succ_edge (preheader),
 461                           single_pred (preheader));
 462       delete_basic_block (preheader);
 463       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 464                                loop_preheader_edge (scalar_loop)->src);
 465     }
 466   else /* Add the copy at entry.  */
 467     {
 468       if (scalar_loop != loop)
 469         {
 470           /* Remove the non-necessary forwarder of scalar_loop again.  */
 471           redirect_edge_pred (single_succ_edge (preheader),
 472                               single_pred (preheader));
 473           delete_basic_block (preheader);
 474           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 475                                    loop_preheader_edge (scalar_loop)->src);
 476           preheader = split_edge (loop_preheader_edge (loop));
 477           entry_e = single_pred_edge (preheader);
 478         }
 479
 480       redirect_edge_and_branch_force (entry_e, new_preheader);
 481       flush_pending_stmts (entry_e);
 482       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 483
 484       redirect_edge_and_branch_force (new_exit, preheader);
 485       flush_pending_stmts (new_exit);
 486       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 487
 488       /* And remove the non-necessary forwarder again.  Keep the other
 489          one so we have a proper pre-header for the loop at the exit edge.  */
 490       redirect_edge_pred (single_succ_edge (new_preheader),
 491                           single_pred (new_preheader));
 492       delete_basic_block (new_preheader);
 493       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 494                                loop_preheader_edge (new_loop)->src);
 495     }
 496
 497   /* Skip new preheader since it's deleted if copy loop is added at entry.  */
 498   for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
 499     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 500
 501   if (scalar_loop != loop)
 502     {
 503       /* Update new_loop->header PHIs, so that on the preheader
 504          edge they are the ones from loop rather than scalar_loop.  */
 505       gphi_iterator gsi_orig, gsi_new;
 506       edge orig_e = loop_preheader_edge (loop);
 507       edge new_e = loop_preheader_edge (new_loop);
 508
 509       for (gsi_orig = gsi_start_phis (loop->header),
 510            gsi_new = gsi_start_phis (new_loop->header);
 511            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 512            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 513         {
 514           gphi *orig_phi = gsi_orig.phi ();
 515           gphi *new_phi = gsi_new.phi ();
 516           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 517           location_t orig_locus
 518             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 519
 520           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 521         }
 522     }
 523
 524   free (new_bbs);
 525   free (bbs);
 526
 527   checking_verify_dominators (CDI_DOMINATORS);
 528
 529   return new_loop;
 530 }
 531
 532
 533 /* Given the condition expression COND, put it as the last statement of
 534    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 535    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 536    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
 537    new edge as irreducible if IRREDUCIBLE_P is true.  */
 538
 539 static edge
 540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 541                        basic_block guard_to, basic_block dom_bb,
 542                        profile_probability probability, bool irreducible_p)
 543 {
 544   gimple_stmt_iterator gsi;
 545   edge new_e, enter_e;
 546   gcond *cond_stmt;
 547   gimple_seq gimplify_stmt_list = NULL;
 548
 549   enter_e = EDGE_SUCC (guard_bb, 0);
 550   enter_e->flags &= ~EDGE_FALLTHRU;
 551   enter_e->flags |= EDGE_FALSE_VALUE;
 552   gsi = gsi_last_bb (guard_bb);
 553
 554   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 555                                  NULL_TREE);
 556   if (gimplify_stmt_list)
 557     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 558
 559   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 560   gsi = gsi_last_bb (guard_bb);
 561   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 562
 563   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 564   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 565
 566   new_e->count = guard_bb->count;
 567   new_e->probability = probability;
 568   new_e->count = enter_e->count.apply_probability (probability);
 569   if (irreducible_p)
 570     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
 571
 572   enter_e->count -= new_e->count;
 573   enter_e->probability = probability.invert ();
 574   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 575
 576   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 577   if (enter_e->dest->loop_father->header == enter_e->dest)
 578     split_edge (enter_e);
 579
 580   return new_e;
 581 }
 582
 583
 584 /* This function verifies that the following restrictions apply to LOOP:
 585    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 586        for innermost loop and 5 basic blocks for outer-loop.
 587    (2) it is single entry, single exit
 588    (3) its exit condition is the last stmt in the header
 589    (4) E is the entry/exit edge of LOOP.
 590  */
 591
 592 bool
 593 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 594 {
 595   edge exit_e = single_exit (loop);
 596   edge entry_e = loop_preheader_edge (loop);
 597   gcond *orig_cond = get_loop_exit_condition (loop);
 598   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 599   unsigned int num_bb = loop->inner? 5 : 2;
 600
 601   /* All loops have an outer scope; the only case loop->outer is NULL is for
 602      the function itself.  */
 603   if (!loop_outer (loop)
 604       || loop->num_nodes != num_bb
 605       || !empty_block_p (loop->latch)
 606       || !single_exit (loop)
 607       /* Verify that new loop exit condition can be trivially modified.  */
 608       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 609       || (e != exit_e && e != entry_e))
 610     return false;
 611
 612   return true;
 613 }
 614
 615 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 616    in the exit bb and rename all the uses after the loop.  This simplifies
 617    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 618    (but normally loop closed SSA form doesn't require virtual PHIs to be
 619    in the same form).  Doing this early simplifies the checking what
 620    uses should be renamed.  */
 621
 622 static void
 623 create_lcssa_for_virtual_phi (struct loop *loop)
 624 {
 625   gphi_iterator gsi;
 626   edge exit_e = single_exit (loop);
 627
 628   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 629     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 630       {
 631         gphi *phi = gsi.phi ();
 632         for (gsi = gsi_start_phis (exit_e->dest);
 633              !gsi_end_p (gsi); gsi_next (&gsi))
 634           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 635             break;
 636         if (gsi_end_p (gsi))
 637           {
 638             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 639             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 640             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 641             imm_use_iterator imm_iter;
 642             gimple *stmt;
 643             use_operand_p use_p;
 644
 645             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 646             gimple_phi_set_result (new_phi, new_vop);
 647             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 648               if (stmt != new_phi
 649                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 650                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 651                   SET_USE (use_p, new_vop);
 652           }
 653         break;
 654       }
 655
 656 }
 657
 658 /* Function vect_get_loop_location.
 659
 660    Extract the location of the loop in the source code.
 661    If the loop is not well formed for vectorization, an estimated
 662    location is calculated.
 663    Return the loop location if succeed and NULL if not.  */
 664
 665 source_location
 666 find_loop_location (struct loop *loop)
 667 {
 668   gimple *stmt = NULL;
 669   basic_block bb;
 670   gimple_stmt_iterator si;
 671
 672   if (!loop)
 673     return UNKNOWN_LOCATION;
 674
 675   stmt = get_loop_exit_condition (loop);
 676
 677   if (stmt
 678       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 679     return gimple_location (stmt);
 680
 681   /* If we got here the loop is probably not "well formed",
 682      try to estimate the loop location */
 683
 684   if (!loop->header)
 685     return UNKNOWN_LOCATION;
 686
 687   bb = loop->header;
 688
 689   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 690     {
 691       stmt = gsi_stmt (si);
 692       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 693         return gimple_location (stmt);
 694     }
 695
 696   return UNKNOWN_LOCATION;
 697 }
 698
 699 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 700
 701 static bool
 702 iv_phi_p (gphi *phi)
 703 {
 704   if (virtual_operand_p (PHI_RESULT (phi)))
 705     return false;
 706
 707   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 708   gcc_assert (stmt_info != NULL);
 709   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 710       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 711     return false;
 712
 713   return true;
 714 }
 715
 716 /* Function vect_can_advance_ivs_p
 717
 718    In case the number of iterations that LOOP iterates is unknown at compile
 719    time, an epilog loop will be generated, and the loop induction variables
 720    (IVs) will be "advanced" to the value they are supposed to take just before
 721    the epilog loop.  Here we check that the access function of the loop IVs
 722    and the expression that represents the loop bound are simple enough.
 723    These restrictions will be relaxed in the future.  */
 724
 725 bool
 726 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 727 {
 728   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 729   basic_block bb = loop->header;
 730   gphi_iterator gsi;
 731
 732   /* Analyze phi functions of the loop header.  */
 733
 734   if (dump_enabled_p ())
 735     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 736   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 737     {
 738       tree evolution_part;
 739
 740       gphi *phi = gsi.phi ();
 741       if (dump_enabled_p ())
 742         {
 743           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 744           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 745         }
 746
 747       /* Skip virtual phi's. The data dependences that are associated with
 748          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 749
 750          Skip reduction phis.  */
 751       if (!iv_phi_p (phi))
 752         {
 753           if (dump_enabled_p ())
 754             dump_printf_loc (MSG_NOTE, vect_location,
 755                              "reduc or virtual phi. skip.\n");
 756           continue;
 757         }
 758
 759       /* Analyze the evolution function.  */
 760
 761       evolution_part
 762         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 763       if (evolution_part == NULL_TREE)
 764         {
 765           if (dump_enabled_p ())
 766             dump_printf (MSG_MISSED_OPTIMIZATION,
 767                          "No access function or evolution.\n");
 768           return false;
 769         }
 770
 771       /* FORNOW: We do not transform initial conditions of IVs
 772          which evolution functions are not invariants in the loop.  */
 773
 774       if (!expr_invariant_in_loop_p (loop, evolution_part))
 775         {
 776           if (dump_enabled_p ())
 777             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 778                              "evolution not invariant in loop.\n");
 779           return false;
 780         }
 781
 782       /* FORNOW: We do not transform initial conditions of IVs
 783          which evolution functions are a polynomial of degree >= 2.  */
 784
 785       if (tree_is_chrec (evolution_part))
 786         {
 787           if (dump_enabled_p ())
 788             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 789                              "evolution is chrec.\n");
 790           return false;
 791         }
 792     }
 793
 794   return true;
 795 }
 796
 797
 798 /*   Function vect_update_ivs_after_vectorizer.
 799
 800      "Advance" the induction variables of LOOP to the value they should take
 801      after the execution of LOOP.  This is currently necessary because the
 802      vectorizer does not handle induction variables that are used after the
 803      loop.  Such a situation occurs when the last iterations of LOOP are
 804      peeled, because:
 805      1. We introduced new uses after LOOP for IVs that were not originally used
 806         after LOOP: the IVs of LOOP are now used by an epilog loop.
 807      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 808         times, whereas the loop IVs should be bumped N times.
 809
 810      Input:
 811      - LOOP - a loop that is going to be vectorized. The last few iterations
 812               of LOOP were peeled.
 813      - NITERS - the number of iterations that LOOP executes (before it is
 814                 vectorized). i.e, the number of times the ivs should be bumped.
 815      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 816                   coming out from LOOP on which there are uses of the LOOP ivs
 817                   (this is the path from LOOP->exit to epilog_loop->preheader).
 818
 819                   The new definitions of the ivs are placed in LOOP->exit.
 820                   The phi args associated with the edge UPDATE_E in the bb
 821                   UPDATE_E->dest are updated accordingly.
 822
 823      Assumption 1: Like the rest of the vectorizer, this function assumes
 824      a single loop exit that has a single predecessor.
 825
 826      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 827      organized in the same order.
 828
 829      Assumption 3: The access function of the ivs is simple enough (see
 830      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 831
 832      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 833      coming out of LOOP on which the ivs of LOOP are used (this is the path
 834      that leads to the epilog loop; other paths skip the epilog loop).  This
 835      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 836      needs to have its phis updated.
 837  */
 838
 839 static void
 840 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 841                                   tree niters, edge update_e)
 842 {
 843   gphi_iterator gsi, gsi1;
 844   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 845   basic_block update_bb = update_e->dest;
 846   basic_block exit_bb = single_exit (loop)->dest;
 847
 848   /* Make sure there exists a single-predecessor exit bb:  */
 849   gcc_assert (single_pred_p (exit_bb));
 850   gcc_assert (single_succ_edge (exit_bb) == update_e);
 851
 852   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 853        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 854        gsi_next (&gsi), gsi_next (&gsi1))
 855     {
 856       tree init_expr;
 857       tree step_expr, off;
 858       tree type;
 859       tree var, ni, ni_name;
 860       gimple_stmt_iterator last_gsi;
 861
 862       gphi *phi = gsi.phi ();
 863       gphi *phi1 = gsi1.phi ();
 864       if (dump_enabled_p ())
 865         {
 866           dump_printf_loc (MSG_NOTE, vect_location,
 867                            "vect_update_ivs_after_vectorizer: phi: ");
 868           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 869         }
 870
 871       /* Skip reduction and virtual phis.  */
 872       if (!iv_phi_p (phi))
 873         {
 874           if (dump_enabled_p ())
 875             dump_printf_loc (MSG_NOTE, vect_location,
 876                              "reduc or virtual phi. skip.\n");
 877           continue;
 878         }
 879
 880       type = TREE_TYPE (gimple_phi_result (phi));
 881       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 882       step_expr = unshare_expr (step_expr);
 883
 884       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 885          of degree >= 2 or exponential.  */
 886       gcc_assert (!tree_is_chrec (step_expr));
 887
 888       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 889
 890       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 891                          fold_convert (TREE_TYPE (step_expr), niters),
 892                          step_expr);
 893       if (POINTER_TYPE_P (type))
 894         ni = fold_build_pointer_plus (init_expr, off);
 895       else
 896         ni = fold_build2 (PLUS_EXPR, type,
 897                           init_expr, fold_convert (type, off));
 898
 899       var = create_tmp_var (type, "tmp");
 900
 901       last_gsi = gsi_last_bb (exit_bb);
 902       gimple_seq new_stmts = NULL;
 903       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 904       /* Exit_bb shouldn't be empty.  */
 905       if (!gsi_end_p (last_gsi))
 906         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 907       else
 908         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 909
 910       /* Fix phi expressions in the successor bb.  */
 911       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 912     }
 913 }
 914
 915 /* Function vect_gen_prolog_loop_niters
 916
 917    Generate the number of iterations which should be peeled as prolog for the
 918    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 919    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 920    As a result, after the execution of this loop, the data reference DR will
 921    refer to an aligned location.  The following computation is generated:
 922
 923    If the misalignment of DR is known at compile time:
 924      addr_mis = int mis = DR_MISALIGNMENT (dr);
 925    Else, compute address misalignment in bytes:
 926      addr_mis = addr & (vectype_align - 1)
 927
 928    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 929
 930    (elem_size = element type size; an element is the scalar element whose type
 931    is the inner type of the vectype)
 932
 933    The computations will be emitted at the end of BB.  We also compute and
 934    store upper bound (included) of the result in BOUND.
 935
 936    When the step of the data-ref in the loop is not 1 (as in interleaved data
 937    and SLP), the number of iterations of the prolog must be divided by the step
 938    (which is equal to the size of interleaved group).
 939
 940    The above formulas assume that VF == number of elements in the vector. This
 941    may not hold when there are multiple-types in the loop.
 942    In this case, for some data-references in the loop the VF does not represent
 943    the number of elements that fit in the vector.  Therefore, instead of VF we
 944    use TYPE_VECTOR_SUBPARTS.  */
 945
 946 static tree
 947 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 948                              basic_block bb, int *bound)
 949 {
 950   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 951   tree var;
 952   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 953   gimple_seq stmts = NULL, new_stmts = NULL;
 954   tree iters, iters_name;
 955   gimple *dr_stmt = DR_STMT (dr);
 956   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 957   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 958   unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
 959
 960   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 961     {
 962       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 963
 964       if (dump_enabled_p ())
 965         dump_printf_loc (MSG_NOTE, vect_location,
 966                          "known peeling = %d.\n", npeel);
 967
 968       iters = build_int_cst (niters_type, npeel);
 969       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 970     }
 971   else
 972     {
 973       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 974       tree offset = negative
 975           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 976       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 977                                                               &stmts, offset);
 978       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 979       tree target_align_minus_1 = build_int_cst (type, target_align - 1);
 980       HOST_WIDE_INT elem_size
 981         = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 982       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 983       HOST_WIDE_INT align_in_elems = target_align / elem_size;
 984       tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
 985       tree align_in_elems_tree = build_int_cst (type, align_in_elems);
 986       tree misalign_in_bytes;
 987       tree misalign_in_elems;
 988
 989       /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
 990       misalign_in_bytes
 991         = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 992                        target_align_minus_1);
 993
 994       /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
 995       misalign_in_elems
 996         = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
 997
 998       /* Create:  (niters_type) ((align_in_elems - misalign_in_elems)
 999                                  & (align_in_elems - 1)).  */
1000       if (negative)
1001         iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1002                              align_in_elems_tree);
1003       else
1004         iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1005                              misalign_in_elems);
1006       iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
1007       iters = fold_convert (niters_type, iters);
1008       *bound = align_in_elems - 1;
1009     }
1010
1011   if (dump_enabled_p ())
1012     {
1013       dump_printf_loc (MSG_NOTE, vect_location,
1014                        "niters for prolog loop: ");
1015       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1016       dump_printf (MSG_NOTE, "\n");
1017     }
1018
1019   var = create_tmp_var (niters_type, "prolog_loop_niters");
1020   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1021
1022   if (new_stmts)
1023     gimple_seq_add_seq (&stmts, new_stmts);
1024   if (stmts)
1025     {
1026       gcc_assert (single_succ_p (bb));
1027       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1028       if (gsi_end_p (gsi))
1029         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1030       else
1031         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1032     }
1033   return iters_name;
1034 }
1035
1036
1037 /* Function vect_update_init_of_dr
1038
1039    NITERS iterations were peeled from LOOP.  DR represents a data reference
1040    in LOOP.  This function updates the information recorded in DR to
1041    account for the fact that the first NITERS iterations had already been
1042    executed.  Specifically, it updates the OFFSET field of DR.  */
1043
1044 static void
1045 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1046 {
1047   tree offset = DR_OFFSET (dr);
1048
1049   niters = fold_build2 (MULT_EXPR, sizetype,
1050                         fold_convert (sizetype, niters),
1051                         fold_convert (sizetype, DR_STEP (dr)));
1052   offset = fold_build2 (PLUS_EXPR, sizetype,
1053                         fold_convert (sizetype, offset), niters);
1054   DR_OFFSET (dr) = offset;
1055 }
1056
1057
1058 /* Function vect_update_inits_of_drs
1059
1060    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1061    This function updates the information recorded for the data references in
1062    the loop to account for the fact that the first NITERS iterations had
1063    already been executed.  Specifically, it updates the initial_condition of
1064    the access_function of all the data_references in the loop.  */
1065
1066 static void
1067 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1068 {
1069   unsigned int i;
1070   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1071   struct data_reference *dr;
1072
1073   if (dump_enabled_p ())
1074     dump_printf_loc (MSG_NOTE, vect_location,
1075                      "=== vect_update_inits_of_dr ===\n");
1076
1077   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1078   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1079     {
1080       gimple_seq seq;
1081       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1082       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1083
1084       niters = fold_convert (sizetype, niters);
1085       niters = force_gimple_operand (niters, &seq, false, var);
1086       if (seq)
1087         {
1088           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1089           gcc_assert (!new_bb);
1090         }
1091     }
1092
1093   FOR_EACH_VEC_ELT (datarefs, i, dr)
1094     vect_update_init_of_dr (dr, niters);
1095 }
1096
1097
1098 /* This function builds ni_name = number of iterations.  Statements
1099    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
1100    it to TRUE if new ssa_var is generated.  */
1101
1102 tree
1103 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1104 {
1105   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1106   if (TREE_CODE (ni) == INTEGER_CST)
1107     return ni;
1108   else
1109     {
1110       tree ni_name, var;
1111       gimple_seq stmts = NULL;
1112       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1113
1114       var = create_tmp_var (TREE_TYPE (ni), "niters");
1115       ni_name = force_gimple_operand (ni, &stmts, false, var);
1116       if (stmts)
1117         {
1118           gsi_insert_seq_on_edge_immediate (pe, stmts);
1119           if (new_var_p != NULL)
1120             *new_var_p = true;
1121         }
1122
1123       return ni_name;
1124     }
1125 }
1126
1127 /* Calculate the number of iterations above which vectorized loop will be
1128    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1129    of prolog loop.  If it's integer const, the integer number is also passed
1130    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1131    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1132    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1133    (rather than vectorized) loop will be executed.  This function stores
1134    upper bound (included) of the result in BOUND_SCALAR.  */
1135
1136 static tree
1137 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1138                              int bound_prolog, int vfm1, int th,
1139                              int *bound_scalar, bool check_profitability)
1140 {
1141   tree type = TREE_TYPE (niters_prolog);
1142   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1143                              build_int_cst (type, vfm1));
1144
1145   *bound_scalar = vfm1 + bound_prolog;
1146   if (check_profitability)
1147     {
1148       /* TH indicates the minimum niters of vectorized loop, while we
1149          compute the maximum niters of scalar loop.  */
1150       th--;
1151       /* Peeling for constant times.  */
1152       if (int_niters_prolog >= 0)
1153         {
1154           *bound_scalar = (int_niters_prolog + vfm1 < th
1155                             ? th
1156                             : vfm1 + int_niters_prolog);
1157           return build_int_cst (type, *bound_scalar);
1158         }
1159       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1160          bound (inlcuded) of niters of prolog loop.  */
1161       if (th >=  vfm1 + bound_prolog)
1162         {
1163           *bound_scalar = th;
1164           return build_int_cst (type, th);
1165         }
1166       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1167       else if (th > vfm1)
1168         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1169     }
1170   return niters;
1171 }
1172
1173 /* This function generates the following statements:
1174
1175    niters = number of iterations loop executes (after peeling)
1176    niters_vector = niters / vf
1177
1178    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1179    true if NITERS doesn't overflow.  */
1180
1181 void
1182 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1183                              tree *niters_vector_ptr, bool niters_no_overflow)
1184 {
1185   tree ni_minus_gap, var;
1186   tree niters_vector, type = TREE_TYPE (niters);
1187   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1188   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1189   tree log_vf = build_int_cst (type, exact_log2 (vf));
1190
1191   /* If epilogue loop is required because of data accesses with gaps, we
1192      subtract one iteration from the total number of iterations here for
1193      correct calculation of RATIO.  */
1194   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1195     {
1196       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1197                                   build_one_cst (type));
1198       if (!is_gimple_val (ni_minus_gap))
1199         {
1200           var = create_tmp_var (type, "ni_gap");
1201           gimple *stmts = NULL;
1202           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1203                                                true, var);
1204           gsi_insert_seq_on_edge_immediate (pe, stmts);
1205         }
1206     }
1207   else
1208     ni_minus_gap = niters;
1209
1210   /* Create: niters >> log2(vf) */
1211   /* If it's known that niters == number of latch executions + 1 doesn't
1212      overflow, we can generate niters >> log2(vf); otherwise we generate
1213      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1214      will be at least one.  */
1215   if (niters_no_overflow)
1216     niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1217   else
1218     niters_vector
1219       = fold_build2 (PLUS_EXPR, type,
1220                      fold_build2 (RSHIFT_EXPR, type,
1221                                   fold_build2 (MINUS_EXPR, type, ni_minus_gap,
1222                                                build_int_cst (type, vf)),
1223                                   log_vf),
1224                      build_int_cst (type, 1));
1225
1226   if (!is_gimple_val (niters_vector))
1227     {
1228       var = create_tmp_var (type, "bnd");
1229       gimple_seq stmts = NULL;
1230       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1231       gsi_insert_seq_on_edge_immediate (pe, stmts);
1232       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1233          we set range information to make niters analyzer's life easier.  */
1234       if (stmts != NULL)
1235         set_range_info (niters_vector, VR_RANGE,
1236                         wi::to_wide (build_int_cst (type, 1)),
1237                         wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1238                                                   TYPE_MAX_VALUE (type),
1239                                                   log_vf)));
1240     }
1241   *niters_vector_ptr = niters_vector;
1242
1243   return;
1244 }
1245
1246 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1247    loop specified by LOOP_VINFO after vectorization, compute the number
1248    of iterations before vectorization (niters_vector * vf) and store it
1249    to NITERS_VECTOR_MULT_VF_PTR.  */
1250
1251 static void
1252 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1253                                      tree niters_vector,
1254                                      tree *niters_vector_mult_vf_ptr)
1255 {
1256   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1257   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1258   tree type = TREE_TYPE (niters_vector);
1259   tree log_vf = build_int_cst (type, exact_log2 (vf));
1260   basic_block exit_bb = single_exit (loop)->dest;
1261
1262   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1263   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1264                                             niters_vector, log_vf);
1265   if (!is_gimple_val (niters_vector_mult_vf))
1266     {
1267       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1268       gimple_seq stmts = NULL;
1269       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1270                                                     &stmts, true, var);
1271       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1272       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1273     }
1274   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1275 }
1276
1277 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1278    from SECOND/FIRST and puts it at the original loop's preheader/exit
1279    edge, the two loops are arranged as below:
1280
1281        preheader_a:
1282      first_loop:
1283        header_a:
1284          i_1 = PHI<i_0, i_2>;
1285          ...
1286          i_2 = i_1 + 1;
1287          if (cond_a)
1288            goto latch_a;
1289          else
1290            goto between_bb;
1291        latch_a:
1292          goto header_a;
1293
1294        between_bb:
1295          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1296
1297      second_loop:
1298        header_b:
1299          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1300                                  or with i_2 if no LCSSA phi is created
1301                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1302          ...
1303          i_4 = i_3 + 1;
1304          if (cond_b)
1305            goto latch_b;
1306          else
1307            goto exit_bb;
1308        latch_b:
1309          goto header_b;
1310
1311        exit_bb:
1312
1313    This function creates loop closed SSA for the first loop; update the
1314    second loop's PHI nodes by replacing argument on incoming edge with the
1315    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1316    is false, Loop closed ssa phis will only be created for non-iv phis for
1317    the first loop.
1318
1319    This function assumes exit bb of the first loop is preheader bb of the
1320    second loop, i.e, between_bb in the example code.  With PHIs updated,
1321    the second loop will execute rest iterations of the first.  */
1322
1323 static void
1324 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1325                                    struct loop *first, struct loop *second,
1326                                    bool create_lcssa_for_iv_phis)
1327 {
1328   gphi_iterator gsi_update, gsi_orig;
1329   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1330
1331   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1332   edge second_preheader_e = loop_preheader_edge (second);
1333   basic_block between_bb = single_exit (first)->dest;
1334
1335   gcc_assert (between_bb == second_preheader_e->src);
1336   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1337   /* Either the first loop or the second is the loop to be vectorized.  */
1338   gcc_assert (loop == first || loop == second);
1339
1340   for (gsi_orig = gsi_start_phis (first->header),
1341        gsi_update = gsi_start_phis (second->header);
1342        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1343        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1344     {
1345       gphi *orig_phi = gsi_orig.phi ();
1346       gphi *update_phi = gsi_update.phi ();
1347
1348       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1349       /* Generate lcssa PHI node for the first loop.  */
1350       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1351       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1352         {
1353           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1354           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1355           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1356           arg = new_res;
1357         }
1358
1359       /* Update PHI node in the second loop by replacing arg on the loop's
1360          incoming edge.  */
1361       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1362     }
1363 }
1364
1365 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1366    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1367    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1368    appear like below:
1369
1370        guard_bb:
1371          if (cond)
1372            goto merge_bb;
1373          else
1374            goto skip_loop;
1375
1376      skip_loop:
1377        header_a:
1378          i_1 = PHI<i_0, i_2>;
1379          ...
1380          i_2 = i_1 + 1;
1381          if (cond_a)
1382            goto latch_a;
1383          else
1384            goto exit_a;
1385        latch_a:
1386          goto header_a;
1387
1388        exit_a:
1389          i_5 = PHI<i_2>;
1390
1391        merge_bb:
1392          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1393
1394      update_loop:
1395        header_b:
1396          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1397          ...
1398          i_4 = i_3 + 1;
1399          if (cond_b)
1400            goto latch_b;
1401          else
1402            goto exit_bb;
1403        latch_b:
1404          goto header_b;
1405
1406        exit_bb:
1407
1408    This function creates PHI nodes at merge_bb and replaces the use of i_5
1409    in the update_loop's PHI node with the result of new PHI result.  */
1410
1411 static void
1412 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1413                                     struct loop *update_loop,
1414                                     edge guard_edge, edge merge_edge)
1415 {
1416   source_location merge_loc, guard_loc;
1417   edge orig_e = loop_preheader_edge (skip_loop);
1418   edge update_e = loop_preheader_edge (update_loop);
1419   gphi_iterator gsi_orig, gsi_update;
1420
1421   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1422         gsi_update = gsi_start_phis (update_loop->header));
1423        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1424        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1425     {
1426       gphi *orig_phi = gsi_orig.phi ();
1427       gphi *update_phi = gsi_update.phi ();
1428
1429       /* Generate new phi node at merge bb of the guard.  */
1430       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1431       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1432
1433       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1434          args in NEW_PHI for these edges.  */
1435       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1436       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1437       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1438       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1439       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1440       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1441
1442       /* Update phi in UPDATE_PHI.  */
1443       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1444     }
1445 }
1446
1447 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1448    this function searches for the corresponding lcssa phi node in exit
1449    bb of LOOP.  If it is found, return the phi result; otherwise return
1450    NULL.  */
1451
1452 static tree
1453 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1454                 gphi *lcssa_phi)
1455 {
1456   gphi_iterator gsi;
1457   edge e = single_exit (loop);
1458
1459   gcc_assert (single_pred_p (e->dest));
1460   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1461     {
1462       gphi *phi = gsi.phi ();
1463       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1464                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1465         return PHI_RESULT (phi);
1466     }
1467   return NULL_TREE;
1468 }
1469
1470 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1471    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1472    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1473    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1474    The CFG looks like:
1475
1476      loop:
1477        header_a:
1478          i_1 = PHI<i_0, i_2>;
1479          ...
1480          i_2 = i_1 + 1;
1481          if (cond_a)
1482            goto latch_a;
1483          else
1484            goto exit_a;
1485        latch_a:
1486          goto header_a;
1487
1488        exit_a:
1489
1490        guard_bb:
1491          if (cond)
1492            goto merge_bb;
1493          else
1494            goto epilog_loop;
1495
1496        ;; fall_through_bb
1497
1498      epilog_loop:
1499        header_b:
1500          i_3 = PHI<i_2, i_4>;
1501          ...
1502          i_4 = i_3 + 1;
1503          if (cond_b)
1504            goto latch_b;
1505          else
1506            goto merge_bb;
1507        latch_b:
1508          goto header_b;
1509
1510        merge_bb:
1511          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1512
1513        exit_bb:
1514          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1515
1516    For each name used out side EPILOG (i.e - for each name that has a lcssa
1517    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1518    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1519    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1520    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1521    in exit_bb will also be updated.  */
1522
1523 static void
1524 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1525                                     edge guard_edge, edge merge_edge)
1526 {
1527   gphi_iterator gsi;
1528   basic_block merge_bb = guard_edge->dest;
1529
1530   gcc_assert (single_succ_p (merge_bb));
1531   edge e = single_succ_edge (merge_bb);
1532   basic_block exit_bb = e->dest;
1533   gcc_assert (single_pred_p (exit_bb));
1534   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1535
1536   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1537     {
1538       gphi *update_phi = gsi.phi ();
1539       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1540       /* This loop-closed-phi actually doesn't represent a use out of the
1541          loop - the phi arg is a constant.  */
1542       if (TREE_CODE (old_arg) != SSA_NAME)
1543         continue;
1544
1545       tree merge_arg = get_current_def (old_arg);
1546       if (!merge_arg)
1547         merge_arg = old_arg;
1548
1549       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1550       /* If the var is live after loop but not a reduction, we simply
1551          use the old arg.  */
1552       if (!guard_arg)
1553         guard_arg = old_arg;
1554
1555       /* Create new phi node in MERGE_BB:  */
1556       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1557       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1558
1559       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1560          the two PHI args in merge_phi for these edges.  */
1561       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1562       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1563
1564       /* Update the original phi in exit_bb.  */
1565       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1566     }
1567 }
1568
1569 /* EPILOG loop is duplicated from the original loop for vectorizing,
1570    the arg of its loop closed ssa PHI needs to be updated.  */
1571
1572 static void
1573 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1574 {
1575   gphi_iterator gsi;
1576   basic_block exit_bb = single_exit (epilog)->dest;
1577
1578   gcc_assert (single_pred_p (exit_bb));
1579   edge e = EDGE_PRED (exit_bb, 0);
1580   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1581     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1582 }
1583
1584 /* Function vect_do_peeling.
1585
1586    Input:
1587    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1588
1589        preheader:
1590      LOOP:
1591        header_bb:
1592          loop_body
1593          if (exit_loop_cond) goto exit_bb
1594          else                goto header_bb
1595        exit_bb:
1596
1597    - NITERS: The number of iterations of the loop.
1598    - NITERSM1: The number of iterations of the loop's latch.
1599    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1600    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1601                               CHECK_PROFITABILITY is true.
1602    Output:
1603    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1604
1605    This function peels prolog and epilog from the loop, adds guards skipping
1606    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1607    would look like:
1608
1609        guard_bb_1:
1610          if (prefer_scalar_loop) goto merge_bb_1
1611          else                    goto guard_bb_2
1612
1613        guard_bb_2:
1614          if (skip_prolog) goto merge_bb_2
1615          else             goto prolog_preheader
1616
1617        prolog_preheader:
1618      PROLOG:
1619        prolog_header_bb:
1620          prolog_body
1621          if (exit_prolog_cond) goto prolog_exit_bb
1622          else                  goto prolog_header_bb
1623        prolog_exit_bb:
1624
1625        merge_bb_2:
1626
1627        vector_preheader:
1628      VECTOR LOOP:
1629        vector_header_bb:
1630          vector_body
1631          if (exit_vector_cond) goto vector_exit_bb
1632          else                  goto vector_header_bb
1633        vector_exit_bb:
1634
1635        guard_bb_3:
1636          if (skip_epilog) goto merge_bb_3
1637          else             goto epilog_preheader
1638
1639        merge_bb_1:
1640
1641        epilog_preheader:
1642      EPILOG:
1643        epilog_header_bb:
1644          epilog_body
1645          if (exit_epilog_cond) goto merge_bb_3
1646          else                  goto epilog_header_bb
1647
1648        merge_bb_3:
1649
1650    Note this function peels prolog and epilog only if it's necessary,
1651    as well as guards.
1652    Returns created epilogue or NULL.
1653
1654    TODO: Guard for prefer_scalar_loop should be emitted along with
1655    versioning conditions if loop versioning is needed.  */
1656
1657
1658 struct loop *
1659 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1660                  tree *niters_vector, int th, bool check_profitability,
1661                  bool niters_no_overflow)
1662 {
1663   edge e, guard_e;
1664   tree type = TREE_TYPE (niters), guard_cond;
1665   basic_block guard_bb, guard_to;
1666   profile_probability prob_prolog, prob_vector, prob_epilog;
1667   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1668   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1669   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1670   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1671                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1672
1673   if (!prolog_peeling && !epilog_peeling)
1674     return NULL;
1675
1676   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1677   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1678     vf = 3;
1679   prob_prolog = prob_epilog = profile_probability::guessed_always ()
1680                         .apply_scale (vf - 1, vf);
1681   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1682
1683   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1684   struct loop *first_loop = loop;
1685   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1686   create_lcssa_for_virtual_phi (loop);
1687   update_ssa (TODO_update_ssa_only_virtuals);
1688
1689   if (MAY_HAVE_DEBUG_STMTS)
1690     {
1691       gcc_assert (!adjust_vec.exists ());
1692       adjust_vec.create (32);
1693     }
1694   initialize_original_copy_tables ();
1695
1696   /* Prolog loop may be skipped.  */
1697   bool skip_prolog = (prolog_peeling != 0);
1698   /* Skip to epilog if scalar loop may be preferred.  It's only needed
1699      when we peel for epilog loop and when it hasn't been checked with
1700      loop versioning.  */
1701   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1702                       && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
1703   /* Epilog loop must be executed if the number of iterations for epilog
1704      loop is known at compile time, otherwise we need to add a check at
1705      the end of vector loop and skip to the end of epilog loop.  */
1706   bool skip_epilog = (prolog_peeling < 0
1707                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1708   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1709   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1710     skip_epilog = false;
1711
1712   /* Record the anchor bb at which guard should be placed if scalar loop
1713      may be preferred.  */
1714   basic_block anchor = loop_preheader_edge (loop)->src;
1715   if (skip_vector)
1716     {
1717       split_edge (loop_preheader_edge (loop));
1718
1719       /* Due to the order in which we peel prolog and epilog, we first
1720          propagate probability to the whole loop.  The purpose is to
1721          avoid adjusting probabilities of both prolog and vector loops
1722          separately.  Note in this case, the probability of epilog loop
1723          needs to be scaled back later.  */
1724       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1725       if (prob_vector.initialized_p ())
1726       scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1727       scale_loop_profile (loop, prob_vector, bound);
1728     }
1729
1730   tree niters_prolog = build_int_cst (type, 0);
1731   source_location loop_loc = find_loop_location (loop);
1732   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1733   if (prolog_peeling)
1734     {
1735       e = loop_preheader_edge (loop);
1736       if (!slpeel_can_duplicate_loop_p (loop, e))
1737         {
1738           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1739                            "loop can't be duplicated to preheader edge.\n");
1740           gcc_unreachable ();
1741         }
1742       /* Peel prolog and put it on preheader edge of loop.  */
1743       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1744       if (!prolog)
1745         {
1746           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1747                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1748           gcc_unreachable ();
1749         }
1750       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1751       first_loop = prolog;
1752       reset_original_copy_tables ();
1753
1754       /* Generate and update the number of iterations for prolog loop.  */
1755       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1756                                                    &bound_prolog);
1757       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1758
1759       /* Skip the prolog loop.  */
1760       if (skip_prolog)
1761         {
1762           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1763                                     niters_prolog, build_int_cst (type, 0));
1764           guard_bb = loop_preheader_edge (prolog)->src;
1765           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1766           guard_to = split_edge (loop_preheader_edge (loop));
1767           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1768                                            guard_to, guard_bb,
1769                                            prob_prolog.invert (),
1770                                            irred_flag);
1771           e = EDGE_PRED (guard_to, 0);
1772           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1773           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1774
1775           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1776           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1777         }
1778       /* Update init address of DRs.  */
1779       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1780       /* Update niters for vector loop.  */
1781       LOOP_VINFO_NITERS (loop_vinfo)
1782         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1783       LOOP_VINFO_NITERSM1 (loop_vinfo)
1784         = fold_build2 (MINUS_EXPR, type,
1785                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1786       bool new_var_p = false;
1787       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1788       /* It's guaranteed that vector loop bound before vectorization is at
1789          least VF, so set range information for newly generated var.  */
1790       if (new_var_p)
1791         set_range_info (niters, VR_RANGE,
1792                         wi::to_wide (build_int_cst (type, vf)),
1793                         wi::to_wide (TYPE_MAX_VALUE (type)));
1794
1795       /* Prolog iterates at most bound_prolog times, latch iterates at
1796          most bound_prolog - 1 times.  */
1797       record_niter_bound (prolog, bound_prolog - 1, false, true);
1798       delete_update_ssa ();
1799       adjust_vec_debug_stmts ();
1800       scev_reset ();
1801     }
1802
1803   if (epilog_peeling)
1804     {
1805       e = single_exit (loop);
1806       if (!slpeel_can_duplicate_loop_p (loop, e))
1807         {
1808           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1809                            "loop can't be duplicated to exit edge.\n");
1810           gcc_unreachable ();
1811         }
1812       /* Peel epilog and put it on exit edge of loop.  */
1813       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1814       if (!epilog)
1815         {
1816           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1817                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1818           gcc_unreachable ();
1819         }
1820       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1821
1822       /* Scalar version loop may be preferred.  In this case, add guard
1823          and skip to epilog.  Note this only happens when the number of
1824          iterations of loop is unknown at compile time, otherwise this
1825          won't be vectorized.  */
1826       if (skip_vector)
1827         {
1828           /* Additional epilogue iteration is peeled if gap exists.  */
1829           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1830           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1831                                                 bound_prolog,
1832                                                 peel_for_gaps ? vf : vf - 1,
1833                                                 th, &bound_scalar,
1834                                                 check_profitability);
1835           /* Build guard against NITERSM1 since NITERS may overflow.  */
1836           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1837           guard_bb = anchor;
1838           guard_to = split_edge (loop_preheader_edge (epilog));
1839           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1840                                            guard_to, guard_bb,
1841                                            prob_vector.invert (),
1842                                            irred_flag);
1843           e = EDGE_PRED (guard_to, 0);
1844           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1845           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1846
1847           /* Simply propagate profile info from guard_bb to guard_to which is
1848              a merge point of control flow.  */
1849           guard_to->frequency = guard_bb->frequency;
1850           guard_to->count = guard_bb->count;
1851           single_succ_edge (guard_to)->count = guard_to->count;
1852           /* Scale probability of epilog loop back.
1853              FIXME: We should avoid scaling down and back up.  Profile may
1854              get lost if we scale down to 0.  */
1855           int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE
1856                          / prob_vector.to_reg_br_prob_base ();
1857           basic_block *bbs = get_loop_body (epilog);
1858           scale_bbs_frequencies_int (bbs, epilog->num_nodes, scale_up,
1859                                      REG_BR_PROB_BASE);
1860           free (bbs);
1861         }
1862
1863       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1864       tree niters_vector_mult_vf;
1865       /* If loop is peeled for non-zero constant times, now niters refers to
1866          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1867          overflows.  */
1868       niters_no_overflow |= (prolog_peeling > 0);
1869       vect_gen_vector_loop_niters (loop_vinfo, niters,
1870                                    niters_vector, niters_no_overflow);
1871       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1872                                            &niters_vector_mult_vf);
1873       /* Update IVs of original loop as if they were advanced by
1874          niters_vector_mult_vf steps.  */
1875       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1876       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1877       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1878                                         update_e);
1879
1880       if (skip_epilog)
1881         {
1882           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1883                                     niters, niters_vector_mult_vf);
1884           guard_bb = single_exit (loop)->dest;
1885           guard_to = split_edge (single_exit (epilog));
1886           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1887                                            skip_vector ? anchor : guard_bb,
1888                                            prob_epilog.invert (),
1889                                            irred_flag);
1890           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1891                                               single_exit (epilog));
1892           /* Only need to handle basic block before epilog loop if it's not
1893              the guard_bb, which is the case when skip_vector is true.  */
1894           if (guard_bb != bb_before_epilog)
1895             {
1896               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
1897
1898               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
1899             }
1900           scale_loop_profile (epilog, prob_epilog, bound);
1901         }
1902       else
1903         slpeel_update_phi_nodes_for_lcssa (epilog);
1904
1905       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1906       /* We share epilog loop with scalar version loop.  */
1907       bound = MAX (bound, bound_scalar - 1);
1908       record_niter_bound (epilog, bound, false, true);
1909
1910       delete_update_ssa ();
1911       adjust_vec_debug_stmts ();
1912       scev_reset ();
1913     }
1914   adjust_vec.release ();
1915   free_original_copy_tables ();
1916
1917   return epilog;
1918 }
1919
1920 /* Function vect_create_cond_for_niters_checks.
1921
1922    Create a conditional expression that represents the run-time checks for
1923    loop's niter.  The loop is guaranteed to to terminate if the run-time
1924    checks hold.
1925
1926    Input:
1927    COND_EXPR  - input conditional expression.  New conditions will be chained
1928                 with logical AND operation.  If it is NULL, then the function
1929                 is used to return the number of alias checks.
1930    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1931                 to be checked.
1932
1933    Output:
1934    COND_EXPR - conditional expression.
1935
1936    The returned COND_EXPR is the conditional expression to be used in the
1937    if statement that controls which version of the loop gets executed at
1938    runtime.  */
1939
1940 static void
1941 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1942 {
1943   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1944
1945   if (*cond_expr)
1946     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1947                               *cond_expr, part_cond_expr);
1948   else
1949     *cond_expr = part_cond_expr;
1950 }
1951
1952 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
1953    and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
1954
1955 static void
1956 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
1957 {
1958   if (*cond_expr)
1959     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1960                               *cond_expr, part_cond_expr);
1961   else
1962     *cond_expr = part_cond_expr;
1963 }
1964
1965 /* Function vect_create_cond_for_align_checks.
1966
1967    Create a conditional expression that represents the alignment checks for
1968    all of data references (array element references) whose alignment must be
1969    checked at runtime.
1970
1971    Input:
1972    COND_EXPR  - input conditional expression.  New conditions will be chained
1973                 with logical AND operation.
1974    LOOP_VINFO - two fields of the loop information are used.
1975                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1976                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1977
1978    Output:
1979    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1980                          expression.
1981    The returned value is the conditional expression to be used in the if
1982    statement that controls which version of the loop gets executed at runtime.
1983
1984    The algorithm makes two assumptions:
1985      1) The number of bytes "n" in a vector is a power of 2.
1986      2) An address "a" is aligned if a%n is zero and that this
1987         test can be done as a&(n-1) == 0.  For example, for 16
1988         byte vectors the test is a&0xf == 0.  */
1989
1990 static void
1991 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1992                                    tree *cond_expr,
1993                                    gimple_seq *cond_expr_stmt_list)
1994 {
1995   vec<gimple *> may_misalign_stmts
1996     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1997   gimple *ref_stmt;
1998   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1999   tree mask_cst;
2000   unsigned int i;
2001   tree int_ptrsize_type;
2002   char tmp_name[20];
2003   tree or_tmp_name = NULL_TREE;
2004   tree and_tmp_name;
2005   gimple *and_stmt;
2006   tree ptrsize_zero;
2007   tree part_cond_expr;
2008
2009   /* Check that mask is one less than a power of 2, i.e., mask is
2010      all zeros followed by all ones.  */
2011   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2012
2013   int_ptrsize_type = signed_type_for (ptr_type_node);
2014
2015   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2016      of the first vector of the i'th data reference. */
2017
2018   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2019     {
2020       gimple_seq new_stmt_list = NULL;
2021       tree addr_base;
2022       tree addr_tmp_name;
2023       tree new_or_tmp_name;
2024       gimple *addr_stmt, *or_stmt;
2025       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2026       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2027       bool negative = tree_int_cst_compare
2028         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2029       tree offset = negative
2030         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2031
2032       /* create: addr_tmp = (int)(address_of_first_vector) */
2033       addr_base =
2034         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2035                                               offset);
2036       if (new_stmt_list != NULL)
2037         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2038
2039       sprintf (tmp_name, "addr2int%d", i);
2040       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2041       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2042       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2043
2044       /* The addresses are OR together.  */
2045
2046       if (or_tmp_name != NULL_TREE)
2047         {
2048           /* create: or_tmp = or_tmp | addr_tmp */
2049           sprintf (tmp_name, "orptrs%d", i);
2050           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2051           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2052                                          or_tmp_name, addr_tmp_name);
2053           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2054           or_tmp_name = new_or_tmp_name;
2055         }
2056       else
2057         or_tmp_name = addr_tmp_name;
2058
2059     } /* end for i */
2060
2061   mask_cst = build_int_cst (int_ptrsize_type, mask);
2062
2063   /* create: and_tmp = or_tmp & mask  */
2064   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2065
2066   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2067                                   or_tmp_name, mask_cst);
2068   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2069
2070   /* Make and_tmp the left operand of the conditional test against zero.
2071      if and_tmp has a nonzero bit then some address is unaligned.  */
2072   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2073   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2074                                 and_tmp_name, ptrsize_zero);
2075   chain_cond_expr (cond_expr, part_cond_expr);
2076 }
2077
2078 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2079    create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2080    Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2081    and this new condition are true.  Treat a null *COND_EXPR as "true".  */
2082
2083 static void
2084 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2085 {
2086   vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2087   unsigned int i;
2088   vec_object_pair *pair;
2089   FOR_EACH_VEC_ELT (pairs, i, pair)
2090     {
2091       tree addr1 = build_fold_addr_expr (pair->first);
2092       tree addr2 = build_fold_addr_expr (pair->second);
2093       tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2094                                          addr1, addr2);
2095       chain_cond_expr (cond_expr, part_cond_expr);
2096     }
2097 }
2098
2099 /* Function vect_create_cond_for_alias_checks.
2100
2101    Create a conditional expression that represents the run-time checks for
2102    overlapping of address ranges represented by a list of data references
2103    relations passed as input.
2104
2105    Input:
2106    COND_EXPR  - input conditional expression.  New conditions will be chained
2107                 with logical AND operation.  If it is NULL, then the function
2108                 is used to return the number of alias checks.
2109    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2110                 to be checked.
2111
2112    Output:
2113    COND_EXPR - conditional expression.
2114
2115    The returned COND_EXPR is the conditional expression to be used in the if
2116    statement that controls which version of the loop gets executed at runtime.
2117 */
2118
2119 void
2120 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2121 {
2122   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2123     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2124
2125   if (comp_alias_ddrs.is_empty ())
2126     return;
2127
2128   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2129                                &comp_alias_ddrs, cond_expr);
2130   if (dump_enabled_p ())
2131     dump_printf_loc (MSG_NOTE, vect_location,
2132                      "created %u versioning for alias checks.\n",
2133                      comp_alias_ddrs.length ());
2134 }
2135
2136
2137 /* Function vect_loop_versioning.
2138
2139    If the loop has data references that may or may not be aligned or/and
2140    has data reference relations whose independence was not proven then
2141    two versions of the loop need to be generated, one which is vectorized
2142    and one which isn't.  A test is then generated to control which of the
2143    loops is executed.  The test checks for the alignment of all of the
2144    data references that may or may not be aligned.  An additional
2145    sequence of runtime tests is generated for each pairs of DDRs whose
2146    independence was not proven.  The vectorized version of loop is
2147    executed only if both alias and alignment tests are passed.
2148
2149    The test generated to check which version of loop is executed
2150    is modified to also check for profitability as indicated by the
2151    cost model threshold TH.
2152
2153    The versioning precondition(s) are placed in *COND_EXPR and
2154    *COND_EXPR_STMT_LIST.  */
2155
2156 void
2157 vect_loop_versioning (loop_vec_info loop_vinfo,
2158                       unsigned int th, bool check_profitability)
2159 {
2160   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2161   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2162   basic_block condition_bb;
2163   gphi_iterator gsi;
2164   gimple_stmt_iterator cond_exp_gsi;
2165   basic_block merge_bb;
2166   basic_block new_exit_bb;
2167   edge new_exit_e, e;
2168   gphi *orig_phi, *new_phi;
2169   tree cond_expr = NULL_TREE;
2170   gimple_seq cond_expr_stmt_list = NULL;
2171   tree arg;
2172   profile_probability prob = profile_probability::likely ();
2173   gimple_seq gimplify_stmt_list = NULL;
2174   tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2175   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2176   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2177   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2178
2179   if (check_profitability)
2180     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2181                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2182                                             th - 1));
2183
2184   if (version_niter)
2185     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2186
2187   if (cond_expr)
2188     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2189                                         is_gimple_condexpr, NULL_TREE);
2190
2191   if (version_align)
2192     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2193                                        &cond_expr_stmt_list);
2194
2195   if (version_alias)
2196     {
2197       vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2198       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2199     }
2200
2201   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2202                                       is_gimple_condexpr, NULL_TREE);
2203   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2204
2205   initialize_original_copy_tables ();
2206   if (scalar_loop)
2207     {
2208       edge scalar_e;
2209       basic_block preheader, scalar_preheader;
2210
2211       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2212          scale LOOP's frequencies instead.  */
2213       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2214                             prob, prob.invert (), prob, prob.invert (), true);
2215       scale_loop_frequencies (loop, prob);
2216       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2217          while we need to move it above LOOP's preheader.  */
2218       e = loop_preheader_edge (loop);
2219       scalar_e = loop_preheader_edge (scalar_loop);
2220       gcc_assert (empty_block_p (e->src)
2221                   && single_pred_p (e->src));
2222       gcc_assert (empty_block_p (scalar_e->src)
2223                   && single_pred_p (scalar_e->src));
2224       gcc_assert (single_pred_p (condition_bb));
2225       preheader = e->src;
2226       scalar_preheader = scalar_e->src;
2227       scalar_e = find_edge (condition_bb, scalar_preheader);
2228       e = single_pred_edge (preheader);
2229       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2230                                       scalar_preheader);
2231       redirect_edge_and_branch_force (scalar_e, preheader);
2232       redirect_edge_and_branch_force (e, condition_bb);
2233       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2234                                single_pred (condition_bb));
2235       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2236                                single_pred (scalar_preheader));
2237       set_immediate_dominator (CDI_DOMINATORS, preheader,
2238                                condition_bb);
2239     }
2240   else
2241     nloop = loop_version (loop, cond_expr, &condition_bb,
2242                           prob, prob.invert (), prob, prob.invert (), true);
2243
2244   if (version_niter)
2245     {
2246       /* The versioned loop could be infinite, we need to clear existing
2247          niter information which is copied from the original loop.  */
2248       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2249       vect_free_loop_info_assumptions (nloop);
2250       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2251       loop_constraint_set (loop, LOOP_C_INFINITE);
2252     }
2253
2254   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2255       && dump_enabled_p ())
2256     {
2257       if (version_alias)
2258         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2259                          "loop versioned for vectorization because of "
2260                          "possible aliasing\n");
2261       if (version_align)
2262         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2263                          "loop versioned for vectorization to enhance "
2264                          "alignment\n");
2265
2266     }
2267   free_original_copy_tables ();
2268
2269   /* Loop versioning violates an assumption we try to maintain during
2270      vectorization - that the loop exit block has a single predecessor.
2271      After versioning, the exit block of both loop versions is the same
2272      basic block (i.e. it has two predecessors). Just in order to simplify
2273      following transformations in the vectorizer, we fix this situation
2274      here by adding a new (empty) block on the exit-edge of the loop,
2275      with the proper loop-exit phis to maintain loop-closed-form.
2276      If loop versioning wasn't done from loop, but scalar_loop instead,
2277      merge_bb will have already just a single successor.  */
2278
2279   merge_bb = single_exit (loop)->dest;
2280   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2281     {
2282       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2283       new_exit_bb = split_edge (single_exit (loop));
2284       new_exit_e = single_exit (loop);
2285       e = EDGE_SUCC (new_exit_bb, 0);
2286
2287       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2288         {
2289           tree new_res;
2290           orig_phi = gsi.phi ();
2291           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2292           new_phi = create_phi_node (new_res, new_exit_bb);
2293           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2294           add_phi_arg (new_phi, arg, new_exit_e,
2295                        gimple_phi_arg_location_from_edge (orig_phi, e));
2296           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2297         }
2298     }
2299
2300   /* End loop-exit-fixes after versioning.  */
2301
2302   if (cond_expr_stmt_list)
2303     {
2304       cond_exp_gsi = gsi_last_bb (condition_bb);
2305       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2306                              GSI_SAME_STMT);
2307     }
2308   update_ssa (TODO_update_ssa);
2309 }