gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120               else
 121                 continue;
 122             }
 123         }
 124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 125            gsi_next (&gsi))
 126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 127     }
 128 }
 129
 130
 131 struct adjust_info
 132 {
 133   tree from, to;
 134   basic_block bb;
 135 };
 136
 137 /* A stack of values to be adjusted in debug stmts.  We have to
 138    process them LIFO, so that the closest substitution applies.  If we
 139    processed them FIFO, without the stack, we might substitute uses
 140    with a PHI DEF that would soon become non-dominant, and when we got
 141    to the suitable one, it wouldn't have anything to substitute any
 142    more.  */
 143 static vec<adjust_info, va_heap> adjust_vec;
 144
 145 /* Adjust any debug stmts that referenced AI->from values to use the
 146    loop-closed AI->to, if the references are dominated by AI->bb and
 147    not by the definition of AI->from.  */
 148
 149 static void
 150 adjust_debug_stmts_now (adjust_info *ai)
 151 {
 152   basic_block bbphi = ai->bb;
 153   tree orig_def = ai->from;
 154   tree new_def = ai->to;
 155   imm_use_iterator imm_iter;
 156   gimple *stmt;
 157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 158
 159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 160
 161   /* Adjust any debug stmts that held onto non-loop-closed
 162      references.  */
 163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 164     {
 165       use_operand_p use_p;
 166       basic_block bbuse;
 167
 168       if (!is_gimple_debug (stmt))
 169         continue;
 170
 171       gcc_assert (gimple_debug_bind_p (stmt));
 172
 173       bbuse = gimple_bb (stmt);
 174
 175       if ((bbuse == bbphi
 176            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 177           && !(bbuse == bbdef
 178                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 179         {
 180           if (new_def)
 181             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 182               SET_USE (use_p, new_def);
 183           else
 184             {
 185               gimple_debug_bind_reset_value (stmt);
 186               update_stmt (stmt);
 187             }
 188         }
 189     }
 190 }
 191
 192 /* Adjust debug stmts as scheduled before.  */
 193
 194 static void
 195 adjust_vec_debug_stmts (void)
 196 {
 197   if (!MAY_HAVE_DEBUG_STMTS)
 198     return;
 199
 200   gcc_assert (adjust_vec.exists ());
 201
 202   while (!adjust_vec.is_empty ())
 203     {
 204       adjust_debug_stmts_now (&adjust_vec.last ());
 205       adjust_vec.pop ();
 206     }
 207 }
 208
 209 /* Adjust any debug stmts that referenced FROM values to use the
 210    loop-closed TO, if the references are dominated by BB and not by
 211    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 212    will be postponed until adjust_vec_debug_stmts is called.  */
 213
 214 static void
 215 adjust_debug_stmts (tree from, tree to, basic_block bb)
 216 {
 217   adjust_info ai;
 218
 219   if (MAY_HAVE_DEBUG_STMTS
 220       && TREE_CODE (from) == SSA_NAME
 221       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 222       && ! virtual_operand_p (from))
 223     {
 224       ai.from = from;
 225       ai.to = to;
 226       ai.bb = bb;
 227
 228       if (adjust_vec.exists ())
 229         adjust_vec.safe_push (ai);
 230       else
 231         adjust_debug_stmts_now (&ai);
 232     }
 233 }
 234
 235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 236    to adjust any debug stmts that referenced the old phi arg,
 237    presumably non-loop-closed references left over from other
 238    transformations.  */
 239
 240 static void
 241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 242 {
 243   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 244
 245   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 246
 247   if (MAY_HAVE_DEBUG_STMTS)
 248     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 249                         gimple_bb (update_phi));
 250 }
 251
 252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 253    that starts at zero, increases by one and its limit is NITERS.
 254
 255    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 256
 257 void
 258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 259 {
 260   tree indx_before_incr, indx_after_incr;
 261   gcond *cond_stmt;
 262   gcond *orig_cond;
 263   edge exit_edge = single_exit (loop);
 264   gimple_stmt_iterator loop_cond_gsi;
 265   gimple_stmt_iterator incr_gsi;
 266   bool insert_after;
 267   tree init = build_int_cst (TREE_TYPE (niters), 0);
 268   tree step = build_int_cst (TREE_TYPE (niters), 1);
 269   source_location loop_loc;
 270   enum tree_code code;
 271
 272   orig_cond = get_loop_exit_condition (loop);
 273   gcc_assert (orig_cond);
 274   loop_cond_gsi = gsi_for_stmt (orig_cond);
 275
 276   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 277   create_iv (init, step, NULL_TREE, loop,
 278              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 279
 280   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 281                                               true, NULL_TREE, true,
 282                                               GSI_SAME_STMT);
 283   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 284                                      true, GSI_SAME_STMT);
 285
 286   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 287   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 288                                  NULL_TREE);
 289
 290   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 291
 292   /* Remove old loop exit test:  */
 293   gsi_remove (&loop_cond_gsi, true);
 294   free_stmt_vec_info (orig_cond);
 295
 296   loop_loc = find_loop_location (loop);
 297   if (dump_enabled_p ())
 298     {
 299       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 300         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 301                      LOCATION_LINE (loop_loc));
 302       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 303     }
 304
 305   /* Record the number of latch iterations.  */
 306   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 307                                      build_int_cst (TREE_TYPE (niters), 1));
 308 }
 309
 310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 311    For all PHI arguments in FROM->dest and TO->dest from those
 312    edges ensure that TO->dest PHI arguments have current_def
 313    to that in from.  */
 314
 315 static void
 316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 317 {
 318   gimple_stmt_iterator gsi_from, gsi_to;
 319
 320   for (gsi_from = gsi_start_phis (from->dest),
 321        gsi_to = gsi_start_phis (to->dest);
 322        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 323     {
 324       gimple *from_phi = gsi_stmt (gsi_from);
 325       gimple *to_phi = gsi_stmt (gsi_to);
 326       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 327       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 328       if (virtual_operand_p (from_arg))
 329         {
 330           gsi_next (&gsi_from);
 331           continue;
 332         }
 333       if (virtual_operand_p (to_arg))
 334         {
 335           gsi_next (&gsi_to);
 336           continue;
 337         }
 338       if (TREE_CODE (from_arg) != SSA_NAME)
 339         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 340       else
 341         {
 342           if (get_current_def (to_arg) == NULL_TREE)
 343             set_current_def (to_arg, get_current_def (from_arg));
 344         }
 345       gsi_next (&gsi_from);
 346       gsi_next (&gsi_to);
 347     }
 348
 349   gphi *from_phi = get_virtual_phi (from->dest);
 350   gphi *to_phi = get_virtual_phi (to->dest);
 351   if (from_phi)
 352     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 353                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 354 }
 355
 356
 357 /* Given LOOP this function generates a new copy of it and puts it
 358    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 359    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 360    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 361    entry or exit of LOOP.  */
 362
 363 struct loop *
 364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 365                                         struct loop *scalar_loop, edge e)
 366 {
 367   struct loop *new_loop;
 368   basic_block *new_bbs, *bbs, *pbbs;
 369   bool at_exit;
 370   bool was_imm_dom;
 371   basic_block exit_dest;
 372   edge exit, new_exit;
 373   bool duplicate_outer_loop = false;
 374
 375   exit = single_exit (loop);
 376   at_exit = (e == exit);
 377   if (!at_exit && e != loop_preheader_edge (loop))
 378     return NULL;
 379
 380   if (scalar_loop == NULL)
 381     scalar_loop = loop;
 382
 383   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 384   pbbs = bbs + 1;
 385   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 386   /* Allow duplication of outer loops.  */
 387   if (scalar_loop->inner)
 388     duplicate_outer_loop = true;
 389   /* Check whether duplication is possible.  */
 390   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 391     {
 392       free (bbs);
 393       return NULL;
 394     }
 395
 396   /* Generate new loop structure.  */
 397   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 398   duplicate_subloops (scalar_loop, new_loop);
 399
 400   exit_dest = exit->dest;
 401   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 402                                           exit_dest) == loop->header ?
 403                  true : false);
 404
 405   /* Also copy the pre-header, this avoids jumping through hoops to
 406      duplicate the loop entry PHI arguments.  Create an empty
 407      pre-header unconditionally for this.  */
 408   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 409   edge entry_e = single_pred_edge (preheader);
 410   bbs[0] = preheader;
 411   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 412
 413   exit = single_exit (scalar_loop);
 414   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 415             &exit, 1, &new_exit, NULL,
 416             at_exit ? loop->latch : e->src, true);
 417   exit = single_exit (loop);
 418   basic_block new_preheader = new_bbs[0];
 419
 420   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 421
 422   if (scalar_loop != loop)
 423     {
 424       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 425          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 426          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 427          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 428          header) to have current_def set, so copy them over.  */
 429       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 430                                                 exit);
 431       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 432                                                            0),
 433                                                 EDGE_SUCC (loop->latch, 0));
 434     }
 435
 436   if (at_exit) /* Add the loop copy at exit.  */
 437     {
 438       if (scalar_loop != loop)
 439         {
 440           gphi_iterator gsi;
 441           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 442
 443           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 444                gsi_next (&gsi))
 445             {
 446               gphi *phi = gsi.phi ();
 447               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 448               location_t orig_locus
 449                 = gimple_phi_arg_location_from_edge (phi, e);
 450
 451               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 452             }
 453         }
 454       redirect_edge_and_branch_force (e, new_preheader);
 455       flush_pending_stmts (e);
 456       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 457       if (was_imm_dom || duplicate_outer_loop)
 458         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 459
 460       /* And remove the non-necessary forwarder again.  Keep the other
 461          one so we have a proper pre-header for the loop at the exit edge.  */
 462       redirect_edge_pred (single_succ_edge (preheader),
 463                           single_pred (preheader));
 464       delete_basic_block (preheader);
 465       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 466                                loop_preheader_edge (scalar_loop)->src);
 467     }
 468   else /* Add the copy at entry.  */
 469     {
 470       if (scalar_loop != loop)
 471         {
 472           /* Remove the non-necessary forwarder of scalar_loop again.  */
 473           redirect_edge_pred (single_succ_edge (preheader),
 474                               single_pred (preheader));
 475           delete_basic_block (preheader);
 476           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 477                                    loop_preheader_edge (scalar_loop)->src);
 478           preheader = split_edge (loop_preheader_edge (loop));
 479           entry_e = single_pred_edge (preheader);
 480         }
 481
 482       redirect_edge_and_branch_force (entry_e, new_preheader);
 483       flush_pending_stmts (entry_e);
 484       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 485
 486       redirect_edge_and_branch_force (new_exit, preheader);
 487       flush_pending_stmts (new_exit);
 488       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 489
 490       /* And remove the non-necessary forwarder again.  Keep the other
 491          one so we have a proper pre-header for the loop at the exit edge.  */
 492       redirect_edge_pred (single_succ_edge (new_preheader),
 493                           single_pred (new_preheader));
 494       delete_basic_block (new_preheader);
 495       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 496                                loop_preheader_edge (new_loop)->src);
 497     }
 498
 499   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 500     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 501
 502   if (scalar_loop != loop)
 503     {
 504       /* Update new_loop->header PHIs, so that on the preheader
 505          edge they are the ones from loop rather than scalar_loop.  */
 506       gphi_iterator gsi_orig, gsi_new;
 507       edge orig_e = loop_preheader_edge (loop);
 508       edge new_e = loop_preheader_edge (new_loop);
 509
 510       for (gsi_orig = gsi_start_phis (loop->header),
 511            gsi_new = gsi_start_phis (new_loop->header);
 512            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 513            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 514         {
 515           gphi *orig_phi = gsi_orig.phi ();
 516           gphi *new_phi = gsi_new.phi ();
 517           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 518           location_t orig_locus
 519             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 520
 521           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 522         }
 523     }
 524
 525   free (new_bbs);
 526   free (bbs);
 527
 528   checking_verify_dominators (CDI_DOMINATORS);
 529
 530   return new_loop;
 531 }
 532
 533
 534 /* Given the condition expression COND, put it as the last statement of
 535    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 536    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 537    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
 538    new edge as irreducible if IRREDUCIBLE_P is true.  */
 539
 540 static edge
 541 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 542                        basic_block guard_to, basic_block dom_bb,
 543                        profile_probability probability, bool irreducible_p)
 544 {
 545   gimple_stmt_iterator gsi;
 546   edge new_e, enter_e;
 547   gcond *cond_stmt;
 548   gimple_seq gimplify_stmt_list = NULL;
 549
 550   enter_e = EDGE_SUCC (guard_bb, 0);
 551   enter_e->flags &= ~EDGE_FALLTHRU;
 552   enter_e->flags |= EDGE_FALSE_VALUE;
 553   gsi = gsi_last_bb (guard_bb);
 554
 555   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 556                                  NULL_TREE);
 557   if (gimplify_stmt_list)
 558     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 559
 560   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 561   gsi = gsi_last_bb (guard_bb);
 562   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 563
 564   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 565   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 566
 567   new_e->count = guard_bb->count;
 568   new_e->probability = probability;
 569   new_e->count = enter_e->count.apply_probability (probability);
 570   if (irreducible_p)
 571     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
 572
 573   enter_e->count -= new_e->count;
 574   enter_e->probability = probability.invert ();
 575   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 576
 577   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 578   if (enter_e->dest->loop_father->header == enter_e->dest)
 579     split_edge (enter_e);
 580
 581   return new_e;
 582 }
 583
 584
 585 /* This function verifies that the following restrictions apply to LOOP:
 586    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 587        for innermost loop and 5 basic blocks for outer-loop.
 588    (2) it is single entry, single exit
 589    (3) its exit condition is the last stmt in the header
 590    (4) E is the entry/exit edge of LOOP.
 591  */
 592
 593 bool
 594 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 595 {
 596   edge exit_e = single_exit (loop);
 597   edge entry_e = loop_preheader_edge (loop);
 598   gcond *orig_cond = get_loop_exit_condition (loop);
 599   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 600   unsigned int num_bb = loop->inner? 5 : 2;
 601
 602   /* All loops have an outer scope; the only case loop->outer is NULL is for
 603      the function itself.  */
 604   if (!loop_outer (loop)
 605       || loop->num_nodes != num_bb
 606       || !empty_block_p (loop->latch)
 607       || !single_exit (loop)
 608       /* Verify that new loop exit condition can be trivially modified.  */
 609       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 610       || (e != exit_e && e != entry_e))
 611     return false;
 612
 613   return true;
 614 }
 615
 616 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 617    in the exit bb and rename all the uses after the loop.  This simplifies
 618    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 619    (but normally loop closed SSA form doesn't require virtual PHIs to be
 620    in the same form).  Doing this early simplifies the checking what
 621    uses should be renamed.  */
 622
 623 static void
 624 create_lcssa_for_virtual_phi (struct loop *loop)
 625 {
 626   gphi_iterator gsi;
 627   edge exit_e = single_exit (loop);
 628
 629   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 630     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 631       {
 632         gphi *phi = gsi.phi ();
 633         for (gsi = gsi_start_phis (exit_e->dest);
 634              !gsi_end_p (gsi); gsi_next (&gsi))
 635           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 636             break;
 637         if (gsi_end_p (gsi))
 638           {
 639             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 640             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 641             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 642             imm_use_iterator imm_iter;
 643             gimple *stmt;
 644             use_operand_p use_p;
 645
 646             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 647             gimple_phi_set_result (new_phi, new_vop);
 648             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 649               if (stmt != new_phi
 650                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 651                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 652                   SET_USE (use_p, new_vop);
 653           }
 654         break;
 655       }
 656
 657 }
 658
 659 /* Function vect_get_loop_location.
 660
 661    Extract the location of the loop in the source code.
 662    If the loop is not well formed for vectorization, an estimated
 663    location is calculated.
 664    Return the loop location if succeed and NULL if not.  */
 665
 666 source_location
 667 find_loop_location (struct loop *loop)
 668 {
 669   gimple *stmt = NULL;
 670   basic_block bb;
 671   gimple_stmt_iterator si;
 672
 673   if (!loop)
 674     return UNKNOWN_LOCATION;
 675
 676   stmt = get_loop_exit_condition (loop);
 677
 678   if (stmt
 679       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 680     return gimple_location (stmt);
 681
 682   /* If we got here the loop is probably not "well formed",
 683      try to estimate the loop location */
 684
 685   if (!loop->header)
 686     return UNKNOWN_LOCATION;
 687
 688   bb = loop->header;
 689
 690   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 691     {
 692       stmt = gsi_stmt (si);
 693       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 694         return gimple_location (stmt);
 695     }
 696
 697   return UNKNOWN_LOCATION;
 698 }
 699
 700 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 701
 702 static bool
 703 iv_phi_p (gphi *phi)
 704 {
 705   if (virtual_operand_p (PHI_RESULT (phi)))
 706     return false;
 707
 708   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 709   gcc_assert (stmt_info != NULL);
 710   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 711       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 712     return false;
 713
 714   return true;
 715 }
 716
 717 /* Function vect_can_advance_ivs_p
 718
 719    In case the number of iterations that LOOP iterates is unknown at compile
 720    time, an epilog loop will be generated, and the loop induction variables
 721    (IVs) will be "advanced" to the value they are supposed to take just before
 722    the epilog loop.  Here we check that the access function of the loop IVs
 723    and the expression that represents the loop bound are simple enough.
 724    These restrictions will be relaxed in the future.  */
 725
 726 bool
 727 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 728 {
 729   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 730   basic_block bb = loop->header;
 731   gphi_iterator gsi;
 732
 733   /* Analyze phi functions of the loop header.  */
 734
 735   if (dump_enabled_p ())
 736     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 737   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 738     {
 739       tree evolution_part;
 740
 741       gphi *phi = gsi.phi ();
 742       if (dump_enabled_p ())
 743         {
 744           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 745           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 746         }
 747
 748       /* Skip virtual phi's. The data dependences that are associated with
 749          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 750
 751          Skip reduction phis.  */
 752       if (!iv_phi_p (phi))
 753         {
 754           if (dump_enabled_p ())
 755             dump_printf_loc (MSG_NOTE, vect_location,
 756                              "reduc or virtual phi. skip.\n");
 757           continue;
 758         }
 759
 760       /* Analyze the evolution function.  */
 761
 762       evolution_part
 763         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 764       if (evolution_part == NULL_TREE)
 765         {
 766           if (dump_enabled_p ())
 767             dump_printf (MSG_MISSED_OPTIMIZATION,
 768                          "No access function or evolution.\n");
 769           return false;
 770         }
 771
 772       /* FORNOW: We do not transform initial conditions of IVs
 773          which evolution functions are not invariants in the loop.  */
 774
 775       if (!expr_invariant_in_loop_p (loop, evolution_part))
 776         {
 777           if (dump_enabled_p ())
 778             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 779                              "evolution not invariant in loop.\n");
 780           return false;
 781         }
 782
 783       /* FORNOW: We do not transform initial conditions of IVs
 784          which evolution functions are a polynomial of degree >= 2.  */
 785
 786       if (tree_is_chrec (evolution_part))
 787         {
 788           if (dump_enabled_p ())
 789             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 790                              "evolution is chrec.\n");
 791           return false;
 792         }
 793     }
 794
 795   return true;
 796 }
 797
 798
 799 /*   Function vect_update_ivs_after_vectorizer.
 800
 801      "Advance" the induction variables of LOOP to the value they should take
 802      after the execution of LOOP.  This is currently necessary because the
 803      vectorizer does not handle induction variables that are used after the
 804      loop.  Such a situation occurs when the last iterations of LOOP are
 805      peeled, because:
 806      1. We introduced new uses after LOOP for IVs that were not originally used
 807         after LOOP: the IVs of LOOP are now used by an epilog loop.
 808      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 809         times, whereas the loop IVs should be bumped N times.
 810
 811      Input:
 812      - LOOP - a loop that is going to be vectorized. The last few iterations
 813               of LOOP were peeled.
 814      - NITERS - the number of iterations that LOOP executes (before it is
 815                 vectorized). i.e, the number of times the ivs should be bumped.
 816      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 817                   coming out from LOOP on which there are uses of the LOOP ivs
 818                   (this is the path from LOOP->exit to epilog_loop->preheader).
 819
 820                   The new definitions of the ivs are placed in LOOP->exit.
 821                   The phi args associated with the edge UPDATE_E in the bb
 822                   UPDATE_E->dest are updated accordingly.
 823
 824      Assumption 1: Like the rest of the vectorizer, this function assumes
 825      a single loop exit that has a single predecessor.
 826
 827      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 828      organized in the same order.
 829
 830      Assumption 3: The access function of the ivs is simple enough (see
 831      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 832
 833      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 834      coming out of LOOP on which the ivs of LOOP are used (this is the path
 835      that leads to the epilog loop; other paths skip the epilog loop).  This
 836      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 837      needs to have its phis updated.
 838  */
 839
 840 static void
 841 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 842                                   tree niters, edge update_e)
 843 {
 844   gphi_iterator gsi, gsi1;
 845   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 846   basic_block update_bb = update_e->dest;
 847   basic_block exit_bb = single_exit (loop)->dest;
 848
 849   /* Make sure there exists a single-predecessor exit bb:  */
 850   gcc_assert (single_pred_p (exit_bb));
 851   gcc_assert (single_succ_edge (exit_bb) == update_e);
 852
 853   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 854        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 855        gsi_next (&gsi), gsi_next (&gsi1))
 856     {
 857       tree init_expr;
 858       tree step_expr, off;
 859       tree type;
 860       tree var, ni, ni_name;
 861       gimple_stmt_iterator last_gsi;
 862
 863       gphi *phi = gsi.phi ();
 864       gphi *phi1 = gsi1.phi ();
 865       if (dump_enabled_p ())
 866         {
 867           dump_printf_loc (MSG_NOTE, vect_location,
 868                            "vect_update_ivs_after_vectorizer: phi: ");
 869           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 870         }
 871
 872       /* Skip reduction and virtual phis.  */
 873       if (!iv_phi_p (phi))
 874         {
 875           if (dump_enabled_p ())
 876             dump_printf_loc (MSG_NOTE, vect_location,
 877                              "reduc or virtual phi. skip.\n");
 878           continue;
 879         }
 880
 881       type = TREE_TYPE (gimple_phi_result (phi));
 882       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 883       step_expr = unshare_expr (step_expr);
 884
 885       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 886          of degree >= 2 or exponential.  */
 887       gcc_assert (!tree_is_chrec (step_expr));
 888
 889       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 890
 891       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 892                          fold_convert (TREE_TYPE (step_expr), niters),
 893                          step_expr);
 894       if (POINTER_TYPE_P (type))
 895         ni = fold_build_pointer_plus (init_expr, off);
 896       else
 897         ni = fold_build2 (PLUS_EXPR, type,
 898                           init_expr, fold_convert (type, off));
 899
 900       var = create_tmp_var (type, "tmp");
 901
 902       last_gsi = gsi_last_bb (exit_bb);
 903       gimple_seq new_stmts = NULL;
 904       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 905       /* Exit_bb shouldn't be empty.  */
 906       if (!gsi_end_p (last_gsi))
 907         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 908       else
 909         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 910
 911       /* Fix phi expressions in the successor bb.  */
 912       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 913     }
 914 }
 915
 916 /* Function vect_gen_prolog_loop_niters
 917
 918    Generate the number of iterations which should be peeled as prolog for the
 919    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 920    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 921    As a result, after the execution of this loop, the data reference DR will
 922    refer to an aligned location.  The following computation is generated:
 923
 924    If the misalignment of DR is known at compile time:
 925      addr_mis = int mis = DR_MISALIGNMENT (dr);
 926    Else, compute address misalignment in bytes:
 927      addr_mis = addr & (vectype_align - 1)
 928
 929    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 930
 931    (elem_size = element type size; an element is the scalar element whose type
 932    is the inner type of the vectype)
 933
 934    The computations will be emitted at the end of BB.  We also compute and
 935    store upper bound (included) of the result in BOUND.
 936
 937    When the step of the data-ref in the loop is not 1 (as in interleaved data
 938    and SLP), the number of iterations of the prolog must be divided by the step
 939    (which is equal to the size of interleaved group).
 940
 941    The above formulas assume that VF == number of elements in the vector. This
 942    may not hold when there are multiple-types in the loop.
 943    In this case, for some data-references in the loop the VF does not represent
 944    the number of elements that fit in the vector.  Therefore, instead of VF we
 945    use TYPE_VECTOR_SUBPARTS.  */
 946
 947 static tree
 948 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 949                              basic_block bb, int *bound)
 950 {
 951   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 952   tree var;
 953   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 954   gimple_seq stmts = NULL, new_stmts = NULL;
 955   tree iters, iters_name;
 956   gimple *dr_stmt = DR_STMT (dr);
 957   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 958   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 959   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 960   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 961
 962   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 963     {
 964       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 965
 966       if (dump_enabled_p ())
 967         dump_printf_loc (MSG_NOTE, vect_location,
 968                          "known peeling = %d.\n", npeel);
 969
 970       iters = build_int_cst (niters_type, npeel);
 971       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 972     }
 973   else
 974     {
 975       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 976       tree offset = negative
 977           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 978       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 979                                                               &stmts, offset);
 980       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 981       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 982       HOST_WIDE_INT elem_size =
 983                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 984       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 985       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 986       tree nelements_tree = build_int_cst (type, nelements);
 987       tree byte_misalign;
 988       tree elem_misalign;
 989
 990       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 991       byte_misalign =
 992         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 993                      vectype_align_minus_1);
 994
 995       /* Create:  elem_misalign = byte_misalign / element_size  */
 996       elem_misalign =
 997         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 998
 999       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1000       if (negative)
1001         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1002       else
1003         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1004       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1005       iters = fold_convert (niters_type, iters);
1006       *bound = nelements - 1;
1007     }
1008
1009   if (dump_enabled_p ())
1010     {
1011       dump_printf_loc (MSG_NOTE, vect_location,
1012                        "niters for prolog loop: ");
1013       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1014       dump_printf (MSG_NOTE, "\n");
1015     }
1016
1017   var = create_tmp_var (niters_type, "prolog_loop_niters");
1018   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1019
1020   if (new_stmts)
1021     gimple_seq_add_seq (&stmts, new_stmts);
1022   if (stmts)
1023     {
1024       gcc_assert (single_succ_p (bb));
1025       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1026       if (gsi_end_p (gsi))
1027         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1028       else
1029         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1030     }
1031   return iters_name;
1032 }
1033
1034
1035 /* Function vect_update_init_of_dr
1036
1037    NITERS iterations were peeled from LOOP.  DR represents a data reference
1038    in LOOP.  This function updates the information recorded in DR to
1039    account for the fact that the first NITERS iterations had already been
1040    executed.  Specifically, it updates the OFFSET field of DR.  */
1041
1042 static void
1043 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1044 {
1045   tree offset = DR_OFFSET (dr);
1046
1047   niters = fold_build2 (MULT_EXPR, sizetype,
1048                         fold_convert (sizetype, niters),
1049                         fold_convert (sizetype, DR_STEP (dr)));
1050   offset = fold_build2 (PLUS_EXPR, sizetype,
1051                         fold_convert (sizetype, offset), niters);
1052   DR_OFFSET (dr) = offset;
1053 }
1054
1055
1056 /* Function vect_update_inits_of_drs
1057
1058    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1059    This function updates the information recorded for the data references in
1060    the loop to account for the fact that the first NITERS iterations had
1061    already been executed.  Specifically, it updates the initial_condition of
1062    the access_function of all the data_references in the loop.  */
1063
1064 static void
1065 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1066 {
1067   unsigned int i;
1068   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1069   struct data_reference *dr;
1070
1071   if (dump_enabled_p ())
1072     dump_printf_loc (MSG_NOTE, vect_location,
1073                      "=== vect_update_inits_of_dr ===\n");
1074
1075   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1076   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1077     {
1078       gimple_seq seq;
1079       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1080       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1081
1082       niters = fold_convert (sizetype, niters);
1083       niters = force_gimple_operand (niters, &seq, false, var);
1084       if (seq)
1085         {
1086           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1087           gcc_assert (!new_bb);
1088         }
1089     }
1090
1091   FOR_EACH_VEC_ELT (datarefs, i, dr)
1092     vect_update_init_of_dr (dr, niters);
1093 }
1094
1095
1096 /* This function builds ni_name = number of iterations.  Statements
1097    are emitted on the loop preheader edge.  If NEW_VAR_P is not NULL, set
1098    it to TRUE if new ssa_var is generated.  */
1099
1100 tree
1101 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
1102 {
1103   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1104   if (TREE_CODE (ni) == INTEGER_CST)
1105     return ni;
1106   else
1107     {
1108       tree ni_name, var;
1109       gimple_seq stmts = NULL;
1110       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1111
1112       var = create_tmp_var (TREE_TYPE (ni), "niters");
1113       ni_name = force_gimple_operand (ni, &stmts, false, var);
1114       if (stmts)
1115         {
1116           gsi_insert_seq_on_edge_immediate (pe, stmts);
1117           if (new_var_p != NULL)
1118             *new_var_p = true;
1119         }
1120
1121       return ni_name;
1122     }
1123 }
1124
1125 /* Calculate the number of iterations above which vectorized loop will be
1126    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1127    of prolog loop.  If it's integer const, the integer number is also passed
1128    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1129    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1130    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1131    (rather than vectorized) loop will be executed.  This function stores
1132    upper bound (included) of the result in BOUND_SCALAR.  */
1133
1134 static tree
1135 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1136                              int bound_prolog, int vfm1, int th,
1137                              int *bound_scalar, bool check_profitability)
1138 {
1139   tree type = TREE_TYPE (niters_prolog);
1140   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1141                              build_int_cst (type, vfm1));
1142
1143   *bound_scalar = vfm1 + bound_prolog;
1144   if (check_profitability)
1145     {
1146       /* TH indicates the minimum niters of vectorized loop, while we
1147          compute the maximum niters of scalar loop.  */
1148       th--;
1149       /* Peeling for constant times.  */
1150       if (int_niters_prolog >= 0)
1151         {
1152           *bound_scalar = (int_niters_prolog + vfm1 < th
1153                             ? th
1154                             : vfm1 + int_niters_prolog);
1155           return build_int_cst (type, *bound_scalar);
1156         }
1157       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1158          bound (inlcuded) of niters of prolog loop.  */
1159       if (th >=  vfm1 + bound_prolog)
1160         {
1161           *bound_scalar = th;
1162           return build_int_cst (type, th);
1163         }
1164       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1165       else if (th > vfm1)
1166         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1167     }
1168   return niters;
1169 }
1170
1171 /* This function generates the following statements:
1172
1173    niters = number of iterations loop executes (after peeling)
1174    niters_vector = niters / vf
1175
1176    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1177    true if NITERS doesn't overflow.  */
1178
1179 void
1180 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1181                              tree *niters_vector_ptr, bool niters_no_overflow)
1182 {
1183   tree ni_minus_gap, var;
1184   tree niters_vector, type = TREE_TYPE (niters);
1185   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1186   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1187   tree log_vf = build_int_cst (type, exact_log2 (vf));
1188
1189   /* If epilogue loop is required because of data accesses with gaps, we
1190      subtract one iteration from the total number of iterations here for
1191      correct calculation of RATIO.  */
1192   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1193     {
1194       ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1195                                   build_one_cst (type));
1196       if (!is_gimple_val (ni_minus_gap))
1197         {
1198           var = create_tmp_var (type, "ni_gap");
1199           gimple *stmts = NULL;
1200           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1201                                                true, var);
1202           gsi_insert_seq_on_edge_immediate (pe, stmts);
1203         }
1204     }
1205   else
1206     ni_minus_gap = niters;
1207
1208   /* Create: niters >> log2(vf) */
1209   /* If it's known that niters == number of latch executions + 1 doesn't
1210      overflow, we can generate niters >> log2(vf); otherwise we generate
1211      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1212      will be at least one.  */
1213   if (niters_no_overflow)
1214     niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1215   else
1216     niters_vector
1217       = fold_build2 (PLUS_EXPR, type,
1218                      fold_build2 (RSHIFT_EXPR, type,
1219                                   fold_build2 (MINUS_EXPR, type, ni_minus_gap,
1220                                                build_int_cst (type, vf)),
1221                                   log_vf),
1222                      build_int_cst (type, 1));
1223
1224   if (!is_gimple_val (niters_vector))
1225     {
1226       var = create_tmp_var (type, "bnd");
1227       gimple_seq stmts = NULL;
1228       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1229       gsi_insert_seq_on_edge_immediate (pe, stmts);
1230       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1231          we set range information to make niters analyzer's life easier.  */
1232       if (stmts != NULL)
1233         set_range_info (niters_vector, VR_RANGE, build_int_cst (type, 1),
1234                         fold_build2 (RSHIFT_EXPR, type,
1235                                      TYPE_MAX_VALUE (type), log_vf));
1236     }
1237   *niters_vector_ptr = niters_vector;
1238
1239   return;
1240 }
1241
1242 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1243    loop specified by LOOP_VINFO after vectorization, compute the number
1244    of iterations before vectorization (niters_vector * vf) and store it
1245    to NITERS_VECTOR_MULT_VF_PTR.  */
1246
1247 static void
1248 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1249                                      tree niters_vector,
1250                                      tree *niters_vector_mult_vf_ptr)
1251 {
1252   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1253   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1254   tree type = TREE_TYPE (niters_vector);
1255   tree log_vf = build_int_cst (type, exact_log2 (vf));
1256   basic_block exit_bb = single_exit (loop)->dest;
1257
1258   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1259   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1260                                             niters_vector, log_vf);
1261   if (!is_gimple_val (niters_vector_mult_vf))
1262     {
1263       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1264       gimple_seq stmts = NULL;
1265       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1266                                                     &stmts, true, var);
1267       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1268       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1269     }
1270   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1271 }
1272
1273 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1274    from SECOND/FIRST and puts it at the original loop's preheader/exit
1275    edge, the two loops are arranged as below:
1276
1277        preheader_a:
1278      first_loop:
1279        header_a:
1280          i_1 = PHI<i_0, i_2>;
1281          ...
1282          i_2 = i_1 + 1;
1283          if (cond_a)
1284            goto latch_a;
1285          else
1286            goto between_bb;
1287        latch_a:
1288          goto header_a;
1289
1290        between_bb:
1291          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1292
1293      second_loop:
1294        header_b:
1295          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1296                                  or with i_2 if no LCSSA phi is created
1297                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1298          ...
1299          i_4 = i_3 + 1;
1300          if (cond_b)
1301            goto latch_b;
1302          else
1303            goto exit_bb;
1304        latch_b:
1305          goto header_b;
1306
1307        exit_bb:
1308
1309    This function creates loop closed SSA for the first loop; update the
1310    second loop's PHI nodes by replacing argument on incoming edge with the
1311    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1312    is false, Loop closed ssa phis will only be created for non-iv phis for
1313    the first loop.
1314
1315    This function assumes exit bb of the first loop is preheader bb of the
1316    second loop, i.e, between_bb in the example code.  With PHIs updated,
1317    the second loop will execute rest iterations of the first.  */
1318
1319 static void
1320 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1321                                    struct loop *first, struct loop *second,
1322                                    bool create_lcssa_for_iv_phis)
1323 {
1324   gphi_iterator gsi_update, gsi_orig;
1325   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1326
1327   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1328   edge second_preheader_e = loop_preheader_edge (second);
1329   basic_block between_bb = single_exit (first)->dest;
1330
1331   gcc_assert (between_bb == second_preheader_e->src);
1332   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1333   /* Either the first loop or the second is the loop to be vectorized.  */
1334   gcc_assert (loop == first || loop == second);
1335
1336   for (gsi_orig = gsi_start_phis (first->header),
1337        gsi_update = gsi_start_phis (second->header);
1338        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1339        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1340     {
1341       gphi *orig_phi = gsi_orig.phi ();
1342       gphi *update_phi = gsi_update.phi ();
1343
1344       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1345       /* Generate lcssa PHI node for the first loop.  */
1346       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1347       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1348         {
1349           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1350           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1351           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1352           arg = new_res;
1353         }
1354
1355       /* Update PHI node in the second loop by replacing arg on the loop's
1356          incoming edge.  */
1357       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1358     }
1359 }
1360
1361 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1362    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1363    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1364    appear like below:
1365
1366        guard_bb:
1367          if (cond)
1368            goto merge_bb;
1369          else
1370            goto skip_loop;
1371
1372      skip_loop:
1373        header_a:
1374          i_1 = PHI<i_0, i_2>;
1375          ...
1376          i_2 = i_1 + 1;
1377          if (cond_a)
1378            goto latch_a;
1379          else
1380            goto exit_a;
1381        latch_a:
1382          goto header_a;
1383
1384        exit_a:
1385          i_5 = PHI<i_2>;
1386
1387        merge_bb:
1388          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1389
1390      update_loop:
1391        header_b:
1392          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1393          ...
1394          i_4 = i_3 + 1;
1395          if (cond_b)
1396            goto latch_b;
1397          else
1398            goto exit_bb;
1399        latch_b:
1400          goto header_b;
1401
1402        exit_bb:
1403
1404    This function creates PHI nodes at merge_bb and replaces the use of i_5
1405    in the update_loop's PHI node with the result of new PHI result.  */
1406
1407 static void
1408 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1409                                     struct loop *update_loop,
1410                                     edge guard_edge, edge merge_edge)
1411 {
1412   source_location merge_loc, guard_loc;
1413   edge orig_e = loop_preheader_edge (skip_loop);
1414   edge update_e = loop_preheader_edge (update_loop);
1415   gphi_iterator gsi_orig, gsi_update;
1416
1417   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1418         gsi_update = gsi_start_phis (update_loop->header));
1419        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1420        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1421     {
1422       gphi *orig_phi = gsi_orig.phi ();
1423       gphi *update_phi = gsi_update.phi ();
1424
1425       /* Generate new phi node at merge bb of the guard.  */
1426       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1427       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1428
1429       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1430          args in NEW_PHI for these edges.  */
1431       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1432       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1433       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1434       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1435       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1436       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1437
1438       /* Update phi in UPDATE_PHI.  */
1439       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1440     }
1441 }
1442
1443 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1444    this function searches for the corresponding lcssa phi node in exit
1445    bb of LOOP.  If it is found, return the phi result; otherwise return
1446    NULL.  */
1447
1448 static tree
1449 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1450                 gphi *lcssa_phi)
1451 {
1452   gphi_iterator gsi;
1453   edge e = single_exit (loop);
1454
1455   gcc_assert (single_pred_p (e->dest));
1456   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1457     {
1458       gphi *phi = gsi.phi ();
1459       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1460                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1461         return PHI_RESULT (phi);
1462     }
1463   return NULL_TREE;
1464 }
1465
1466 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1467    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1468    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1469    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1470    The CFG looks like:
1471
1472      loop:
1473        header_a:
1474          i_1 = PHI<i_0, i_2>;
1475          ...
1476          i_2 = i_1 + 1;
1477          if (cond_a)
1478            goto latch_a;
1479          else
1480            goto exit_a;
1481        latch_a:
1482          goto header_a;
1483
1484        exit_a:
1485
1486        guard_bb:
1487          if (cond)
1488            goto merge_bb;
1489          else
1490            goto epilog_loop;
1491
1492        ;; fall_through_bb
1493
1494      epilog_loop:
1495        header_b:
1496          i_3 = PHI<i_2, i_4>;
1497          ...
1498          i_4 = i_3 + 1;
1499          if (cond_b)
1500            goto latch_b;
1501          else
1502            goto merge_bb;
1503        latch_b:
1504          goto header_b;
1505
1506        merge_bb:
1507          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1508
1509        exit_bb:
1510          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1511
1512    For each name used out side EPILOG (i.e - for each name that has a lcssa
1513    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1514    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1515    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1516    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1517    in exit_bb will also be updated.  */
1518
1519 static void
1520 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1521                                     edge guard_edge, edge merge_edge)
1522 {
1523   gphi_iterator gsi;
1524   basic_block merge_bb = guard_edge->dest;
1525
1526   gcc_assert (single_succ_p (merge_bb));
1527   edge e = single_succ_edge (merge_bb);
1528   basic_block exit_bb = e->dest;
1529   gcc_assert (single_pred_p (exit_bb));
1530   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1531
1532   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1533     {
1534       gphi *update_phi = gsi.phi ();
1535       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1536       /* This loop-closed-phi actually doesn't represent a use out of the
1537          loop - the phi arg is a constant.  */
1538       if (TREE_CODE (old_arg) != SSA_NAME)
1539         continue;
1540
1541       tree merge_arg = get_current_def (old_arg);
1542       if (!merge_arg)
1543         merge_arg = old_arg;
1544
1545       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1546       /* If the var is live after loop but not a reduction, we simply
1547          use the old arg.  */
1548       if (!guard_arg)
1549         guard_arg = old_arg;
1550
1551       /* Create new phi node in MERGE_BB:  */
1552       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1553       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1554
1555       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1556          the two PHI args in merge_phi for these edges.  */
1557       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1558       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1559
1560       /* Update the original phi in exit_bb.  */
1561       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1562     }
1563 }
1564
1565 /* EPILOG loop is duplicated from the original loop for vectorizing,
1566    the arg of its loop closed ssa PHI needs to be updated.  */
1567
1568 static void
1569 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1570 {
1571   gphi_iterator gsi;
1572   basic_block exit_bb = single_exit (epilog)->dest;
1573
1574   gcc_assert (single_pred_p (exit_bb));
1575   edge e = EDGE_PRED (exit_bb, 0);
1576   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1577     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1578 }
1579
1580 /* Function vect_do_peeling.
1581
1582    Input:
1583    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1584
1585        preheader:
1586      LOOP:
1587        header_bb:
1588          loop_body
1589          if (exit_loop_cond) goto exit_bb
1590          else                goto header_bb
1591        exit_bb:
1592
1593    - NITERS: The number of iterations of the loop.
1594    - NITERSM1: The number of iterations of the loop's latch.
1595    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1596    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1597                               CHECK_PROFITABILITY is true.
1598    Output:
1599    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1600
1601    This function peels prolog and epilog from the loop, adds guards skipping
1602    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1603    would look like:
1604
1605        guard_bb_1:
1606          if (prefer_scalar_loop) goto merge_bb_1
1607          else                    goto guard_bb_2
1608
1609        guard_bb_2:
1610          if (skip_prolog) goto merge_bb_2
1611          else             goto prolog_preheader
1612
1613        prolog_preheader:
1614      PROLOG:
1615        prolog_header_bb:
1616          prolog_body
1617          if (exit_prolog_cond) goto prolog_exit_bb
1618          else                  goto prolog_header_bb
1619        prolog_exit_bb:
1620
1621        merge_bb_2:
1622
1623        vector_preheader:
1624      VECTOR LOOP:
1625        vector_header_bb:
1626          vector_body
1627          if (exit_vector_cond) goto vector_exit_bb
1628          else                  goto vector_header_bb
1629        vector_exit_bb:
1630
1631        guard_bb_3:
1632          if (skip_epilog) goto merge_bb_3
1633          else             goto epilog_preheader
1634
1635        merge_bb_1:
1636
1637        epilog_preheader:
1638      EPILOG:
1639        epilog_header_bb:
1640          epilog_body
1641          if (exit_epilog_cond) goto merge_bb_3
1642          else                  goto epilog_header_bb
1643
1644        merge_bb_3:
1645
1646    Note this function peels prolog and epilog only if it's necessary,
1647    as well as guards.
1648    Returns created epilogue or NULL.
1649
1650    TODO: Guard for prefer_scalar_loop should be emitted along with
1651    versioning conditions if loop versioning is needed.  */
1652
1653
1654 struct loop *
1655 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1656                  tree *niters_vector, int th, bool check_profitability,
1657                  bool niters_no_overflow)
1658 {
1659   edge e, guard_e;
1660   tree type = TREE_TYPE (niters), guard_cond;
1661   basic_block guard_bb, guard_to;
1662   profile_probability prob_prolog, prob_vector, prob_epilog;
1663   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1664   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1665   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1666   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1667                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1668
1669   if (!prolog_peeling && !epilog_peeling)
1670     return NULL;
1671
1672   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
1673   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1674     vf = 3;
1675   prob_prolog = prob_epilog = profile_probability::guessed_always ()
1676                         .apply_scale (vf - 1, vf);
1677   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1678
1679   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1680   struct loop *first_loop = loop;
1681   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1682   create_lcssa_for_virtual_phi (loop);
1683   update_ssa (TODO_update_ssa_only_virtuals);
1684
1685   if (MAY_HAVE_DEBUG_STMTS)
1686     {
1687       gcc_assert (!adjust_vec.exists ());
1688       adjust_vec.create (32);
1689     }
1690   initialize_original_copy_tables ();
1691
1692   /* Prolog loop may be skipped.  */
1693   bool skip_prolog = (prolog_peeling != 0);
1694   /* Skip to epilog if scalar loop may be preferred.  It's only needed
1695      when we peel for epilog loop and when it hasn't been checked with
1696      loop versioning.  */
1697   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1698                       && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
1699   /* Epilog loop must be executed if the number of iterations for epilog
1700      loop is known at compile time, otherwise we need to add a check at
1701      the end of vector loop and skip to the end of epilog loop.  */
1702   bool skip_epilog = (prolog_peeling < 0
1703                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1704   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1705   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1706     skip_epilog = false;
1707
1708   /* Record the anchor bb at which guard should be placed if scalar loop
1709      may be preferred.  */
1710   basic_block anchor = loop_preheader_edge (loop)->src;
1711   if (skip_vector)
1712     {
1713       split_edge (loop_preheader_edge (loop));
1714
1715       /* Due to the order in which we peel prolog and epilog, we first
1716          propagate probability to the whole loop.  The purpose is to
1717          avoid adjusting probabilities of both prolog and vector loops
1718          separately.  Note in this case, the probability of epilog loop
1719          needs to be scaled back later.  */
1720       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1721       if (prob_vector.initialized_p ())
1722       scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1723       scale_loop_profile (loop, prob_vector, bound);
1724     }
1725
1726   tree niters_prolog = build_int_cst (type, 0);
1727   source_location loop_loc = find_loop_location (loop);
1728   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1729   if (prolog_peeling)
1730     {
1731       e = loop_preheader_edge (loop);
1732       if (!slpeel_can_duplicate_loop_p (loop, e))
1733         {
1734           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1735                            "loop can't be duplicated to preheader edge.\n");
1736           gcc_unreachable ();
1737         }
1738       /* Peel prolog and put it on preheader edge of loop.  */
1739       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1740       if (!prolog)
1741         {
1742           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1743                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1744           gcc_unreachable ();
1745         }
1746       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1747       first_loop = prolog;
1748       reset_original_copy_tables ();
1749
1750       /* Generate and update the number of iterations for prolog loop.  */
1751       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1752                                                    &bound_prolog);
1753       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1754
1755       /* Skip the prolog loop.  */
1756       if (skip_prolog)
1757         {
1758           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1759                                     niters_prolog, build_int_cst (type, 0));
1760           guard_bb = loop_preheader_edge (prolog)->src;
1761           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1762           guard_to = split_edge (loop_preheader_edge (loop));
1763           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1764                                            guard_to, guard_bb,
1765                                            prob_prolog.invert (),
1766                                            irred_flag);
1767           e = EDGE_PRED (guard_to, 0);
1768           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1769           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1770
1771           scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1772           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1773         }
1774       /* Update init address of DRs.  */
1775       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1776       /* Update niters for vector loop.  */
1777       LOOP_VINFO_NITERS (loop_vinfo)
1778         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1779       LOOP_VINFO_NITERSM1 (loop_vinfo)
1780         = fold_build2 (MINUS_EXPR, type,
1781                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1782       bool new_var_p = false;
1783       niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1784       /* It's guaranteed that vector loop bound before vectorization is at
1785          least VF, so set range information for newly generated var.  */
1786       if (new_var_p)
1787         set_range_info (niters, VR_RANGE,
1788                         build_int_cst (type, vf), TYPE_MAX_VALUE (type));
1789
1790       /* Prolog iterates at most bound_prolog times, latch iterates at
1791          most bound_prolog - 1 times.  */
1792       record_niter_bound (prolog, bound_prolog - 1, false, true);
1793       delete_update_ssa ();
1794       adjust_vec_debug_stmts ();
1795       scev_reset ();
1796     }
1797
1798   if (epilog_peeling)
1799     {
1800       e = single_exit (loop);
1801       if (!slpeel_can_duplicate_loop_p (loop, e))
1802         {
1803           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1804                            "loop can't be duplicated to exit edge.\n");
1805           gcc_unreachable ();
1806         }
1807       /* Peel epilog and put it on exit edge of loop.  */
1808       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1809       if (!epilog)
1810         {
1811           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1812                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1813           gcc_unreachable ();
1814         }
1815       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1816
1817       /* Scalar version loop may be preferred.  In this case, add guard
1818          and skip to epilog.  Note this only happens when the number of
1819          iterations of loop is unknown at compile time, otherwise this
1820          won't be vectorized.  */
1821       if (skip_vector)
1822         {
1823           /* Additional epilogue iteration is peeled if gap exists.  */
1824           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1825           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1826                                                 bound_prolog,
1827                                                 peel_for_gaps ? vf : vf - 1,
1828                                                 th, &bound_scalar,
1829                                                 check_profitability);
1830           /* Build guard against NITERSM1 since NITERS may overflow.  */
1831           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1832           guard_bb = anchor;
1833           guard_to = split_edge (loop_preheader_edge (epilog));
1834           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1835                                            guard_to, guard_bb,
1836                                            prob_vector.invert (),
1837                                            irred_flag);
1838           e = EDGE_PRED (guard_to, 0);
1839           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1840           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1841
1842           /* Simply propagate profile info from guard_bb to guard_to which is
1843              a merge point of control flow.  */
1844           guard_to->frequency = guard_bb->frequency;
1845           guard_to->count = guard_bb->count;
1846           single_succ_edge (guard_to)->count = guard_to->count;
1847           /* Scale probability of epilog loop back.
1848              FIXME: We should avoid scaling down and back up.  Profile may
1849              get lost if we scale down to 0.  */
1850           int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE
1851                          / prob_vector.to_reg_br_prob_base ();
1852           basic_block *bbs = get_loop_body (epilog);
1853           scale_bbs_frequencies_int (bbs, epilog->num_nodes, scale_up,
1854                                      REG_BR_PROB_BASE);
1855           free (bbs);
1856         }
1857
1858       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1859       tree niters_vector_mult_vf;
1860       /* If loop is peeled for non-zero constant times, now niters refers to
1861          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1862          overflows.  */
1863       niters_no_overflow |= (prolog_peeling > 0);
1864       vect_gen_vector_loop_niters (loop_vinfo, niters,
1865                                    niters_vector, niters_no_overflow);
1866       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1867                                            &niters_vector_mult_vf);
1868       /* Update IVs of original loop as if they were advanced by
1869          niters_vector_mult_vf steps.  */
1870       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1871       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1872       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1873                                         update_e);
1874
1875       if (skip_epilog)
1876         {
1877           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1878                                     niters, niters_vector_mult_vf);
1879           guard_bb = single_exit (loop)->dest;
1880           guard_to = split_edge (single_exit (epilog));
1881           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1882                                            skip_vector ? anchor : guard_bb,
1883                                            prob_epilog.invert (),
1884                                            irred_flag);
1885           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1886                                               single_exit (epilog));
1887           /* Only need to handle basic block before epilog loop if it's not
1888              the guard_bb, which is the case when skip_vector is true.  */
1889           if (guard_bb != bb_before_epilog)
1890             {
1891               prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
1892
1893               scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
1894             }
1895           scale_loop_profile (epilog, prob_epilog, bound);
1896         }
1897       else
1898         slpeel_update_phi_nodes_for_lcssa (epilog);
1899
1900       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1901       /* We share epilog loop with scalar version loop.  */
1902       bound = MAX (bound, bound_scalar - 1);
1903       record_niter_bound (epilog, bound, false, true);
1904
1905       delete_update_ssa ();
1906       adjust_vec_debug_stmts ();
1907       scev_reset ();
1908     }
1909   adjust_vec.release ();
1910   free_original_copy_tables ();
1911
1912   return epilog;
1913 }
1914
1915 /* Function vect_create_cond_for_niters_checks.
1916
1917    Create a conditional expression that represents the run-time checks for
1918    loop's niter.  The loop is guaranteed to to terminate if the run-time
1919    checks hold.
1920
1921    Input:
1922    COND_EXPR  - input conditional expression.  New conditions will be chained
1923                 with logical AND operation.  If it is NULL, then the function
1924                 is used to return the number of alias checks.
1925    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1926                 to be checked.
1927
1928    Output:
1929    COND_EXPR - conditional expression.
1930
1931    The returned COND_EXPR is the conditional expression to be used in the
1932    if statement that controls which version of the loop gets executed at
1933    runtime.  */
1934
1935 static void
1936 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1937 {
1938   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1939
1940   if (*cond_expr)
1941     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1942                               *cond_expr, part_cond_expr);
1943   else
1944     *cond_expr = part_cond_expr;
1945 }
1946
1947 /* Function vect_create_cond_for_align_checks.
1948
1949    Create a conditional expression that represents the alignment checks for
1950    all of data references (array element references) whose alignment must be
1951    checked at runtime.
1952
1953    Input:
1954    COND_EXPR  - input conditional expression.  New conditions will be chained
1955                 with logical AND operation.
1956    LOOP_VINFO - two fields of the loop information are used.
1957                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1958                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1959
1960    Output:
1961    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1962                          expression.
1963    The returned value is the conditional expression to be used in the if
1964    statement that controls which version of the loop gets executed at runtime.
1965
1966    The algorithm makes two assumptions:
1967      1) The number of bytes "n" in a vector is a power of 2.
1968      2) An address "a" is aligned if a%n is zero and that this
1969         test can be done as a&(n-1) == 0.  For example, for 16
1970         byte vectors the test is a&0xf == 0.  */
1971
1972 static void
1973 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1974                                    tree *cond_expr,
1975                                    gimple_seq *cond_expr_stmt_list)
1976 {
1977   vec<gimple *> may_misalign_stmts
1978     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1979   gimple *ref_stmt;
1980   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1981   tree mask_cst;
1982   unsigned int i;
1983   tree int_ptrsize_type;
1984   char tmp_name[20];
1985   tree or_tmp_name = NULL_TREE;
1986   tree and_tmp_name;
1987   gimple *and_stmt;
1988   tree ptrsize_zero;
1989   tree part_cond_expr;
1990
1991   /* Check that mask is one less than a power of 2, i.e., mask is
1992      all zeros followed by all ones.  */
1993   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1994
1995   int_ptrsize_type = signed_type_for (ptr_type_node);
1996
1997   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1998      of the first vector of the i'th data reference. */
1999
2000   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2001     {
2002       gimple_seq new_stmt_list = NULL;
2003       tree addr_base;
2004       tree addr_tmp_name;
2005       tree new_or_tmp_name;
2006       gimple *addr_stmt, *or_stmt;
2007       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2008       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2009       bool negative = tree_int_cst_compare
2010         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2011       tree offset = negative
2012         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2013
2014       /* create: addr_tmp = (int)(address_of_first_vector) */
2015       addr_base =
2016         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2017                                               offset);
2018       if (new_stmt_list != NULL)
2019         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2020
2021       sprintf (tmp_name, "addr2int%d", i);
2022       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2023       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2024       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2025
2026       /* The addresses are OR together.  */
2027
2028       if (or_tmp_name != NULL_TREE)
2029         {
2030           /* create: or_tmp = or_tmp | addr_tmp */
2031           sprintf (tmp_name, "orptrs%d", i);
2032           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2033           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2034                                          or_tmp_name, addr_tmp_name);
2035           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2036           or_tmp_name = new_or_tmp_name;
2037         }
2038       else
2039         or_tmp_name = addr_tmp_name;
2040
2041     } /* end for i */
2042
2043   mask_cst = build_int_cst (int_ptrsize_type, mask);
2044
2045   /* create: and_tmp = or_tmp & mask  */
2046   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2047
2048   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2049                                   or_tmp_name, mask_cst);
2050   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2051
2052   /* Make and_tmp the left operand of the conditional test against zero.
2053      if and_tmp has a nonzero bit then some address is unaligned.  */
2054   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2055   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2056                                 and_tmp_name, ptrsize_zero);
2057   if (*cond_expr)
2058     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2059                               *cond_expr, part_cond_expr);
2060   else
2061     *cond_expr = part_cond_expr;
2062 }
2063
2064 /* Function vect_create_cond_for_alias_checks.
2065
2066    Create a conditional expression that represents the run-time checks for
2067    overlapping of address ranges represented by a list of data references
2068    relations passed as input.
2069
2070    Input:
2071    COND_EXPR  - input conditional expression.  New conditions will be chained
2072                 with logical AND operation.  If it is NULL, then the function
2073                 is used to return the number of alias checks.
2074    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2075                 to be checked.
2076
2077    Output:
2078    COND_EXPR - conditional expression.
2079
2080    The returned COND_EXPR is the conditional expression to be used in the if
2081    statement that controls which version of the loop gets executed at runtime.
2082 */
2083
2084 void
2085 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2086 {
2087   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2088     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2089
2090   if (comp_alias_ddrs.is_empty ())
2091     return;
2092
2093   create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2094                                &comp_alias_ddrs, cond_expr);
2095   if (dump_enabled_p ())
2096     dump_printf_loc (MSG_NOTE, vect_location,
2097                      "created %u versioning for alias checks.\n",
2098                      comp_alias_ddrs.length ());
2099 }
2100
2101
2102 /* Function vect_loop_versioning.
2103
2104    If the loop has data references that may or may not be aligned or/and
2105    has data reference relations whose independence was not proven then
2106    two versions of the loop need to be generated, one which is vectorized
2107    and one which isn't.  A test is then generated to control which of the
2108    loops is executed.  The test checks for the alignment of all of the
2109    data references that may or may not be aligned.  An additional
2110    sequence of runtime tests is generated for each pairs of DDRs whose
2111    independence was not proven.  The vectorized version of loop is
2112    executed only if both alias and alignment tests are passed.
2113
2114    The test generated to check which version of loop is executed
2115    is modified to also check for profitability as indicated by the
2116    cost model threshold TH.
2117
2118    The versioning precondition(s) are placed in *COND_EXPR and
2119    *COND_EXPR_STMT_LIST.  */
2120
2121 void
2122 vect_loop_versioning (loop_vec_info loop_vinfo,
2123                       unsigned int th, bool check_profitability)
2124 {
2125   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2126   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2127   basic_block condition_bb;
2128   gphi_iterator gsi;
2129   gimple_stmt_iterator cond_exp_gsi;
2130   basic_block merge_bb;
2131   basic_block new_exit_bb;
2132   edge new_exit_e, e;
2133   gphi *orig_phi, *new_phi;
2134   tree cond_expr = NULL_TREE;
2135   gimple_seq cond_expr_stmt_list = NULL;
2136   tree arg;
2137   profile_probability prob = profile_probability::likely ();
2138   gimple_seq gimplify_stmt_list = NULL;
2139   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2140   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2141   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2142   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2143
2144   if (check_profitability)
2145     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2146                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2147                                                        th));
2148
2149   if (version_niter)
2150     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2151
2152   if (cond_expr)
2153     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2154                                         is_gimple_condexpr, NULL_TREE);
2155
2156   if (version_align)
2157     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2158                                        &cond_expr_stmt_list);
2159
2160   if (version_alias)
2161     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2162
2163   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2164                                       is_gimple_condexpr, NULL_TREE);
2165   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2166
2167   initialize_original_copy_tables ();
2168   if (scalar_loop)
2169     {
2170       edge scalar_e;
2171       basic_block preheader, scalar_preheader;
2172
2173       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2174          scale LOOP's frequencies instead.  */
2175       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2176                             prob, prob.invert (), prob, prob.invert (), true);
2177       scale_loop_frequencies (loop, prob);
2178       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2179          while we need to move it above LOOP's preheader.  */
2180       e = loop_preheader_edge (loop);
2181       scalar_e = loop_preheader_edge (scalar_loop);
2182       gcc_assert (empty_block_p (e->src)
2183                   && single_pred_p (e->src));
2184       gcc_assert (empty_block_p (scalar_e->src)
2185                   && single_pred_p (scalar_e->src));
2186       gcc_assert (single_pred_p (condition_bb));
2187       preheader = e->src;
2188       scalar_preheader = scalar_e->src;
2189       scalar_e = find_edge (condition_bb, scalar_preheader);
2190       e = single_pred_edge (preheader);
2191       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2192                                       scalar_preheader);
2193       redirect_edge_and_branch_force (scalar_e, preheader);
2194       redirect_edge_and_branch_force (e, condition_bb);
2195       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2196                                single_pred (condition_bb));
2197       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2198                                single_pred (scalar_preheader));
2199       set_immediate_dominator (CDI_DOMINATORS, preheader,
2200                                condition_bb);
2201     }
2202   else
2203     nloop = loop_version (loop, cond_expr, &condition_bb,
2204                           prob, prob.invert (), prob, prob.invert (), true);
2205
2206   if (version_niter)
2207     {
2208       /* The versioned loop could be infinite, we need to clear existing
2209          niter information which is copied from the original loop.  */
2210       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2211       vect_free_loop_info_assumptions (nloop);
2212       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2213       loop_constraint_set (loop, LOOP_C_INFINITE);
2214     }
2215
2216   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2217       && dump_enabled_p ())
2218     {
2219       if (version_alias)
2220         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2221                          "loop versioned for vectorization because of "
2222                          "possible aliasing\n");
2223       if (version_align)
2224         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2225                          "loop versioned for vectorization to enhance "
2226                          "alignment\n");
2227
2228     }
2229   free_original_copy_tables ();
2230
2231   /* Loop versioning violates an assumption we try to maintain during
2232      vectorization - that the loop exit block has a single predecessor.
2233      After versioning, the exit block of both loop versions is the same
2234      basic block (i.e. it has two predecessors). Just in order to simplify
2235      following transformations in the vectorizer, we fix this situation
2236      here by adding a new (empty) block on the exit-edge of the loop,
2237      with the proper loop-exit phis to maintain loop-closed-form.
2238      If loop versioning wasn't done from loop, but scalar_loop instead,
2239      merge_bb will have already just a single successor.  */
2240
2241   merge_bb = single_exit (loop)->dest;
2242   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2243     {
2244       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2245       new_exit_bb = split_edge (single_exit (loop));
2246       new_exit_e = single_exit (loop);
2247       e = EDGE_SUCC (new_exit_bb, 0);
2248
2249       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2250         {
2251           tree new_res;
2252           orig_phi = gsi.phi ();
2253           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2254           new_phi = create_phi_node (new_res, new_exit_bb);
2255           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2256           add_phi_arg (new_phi, arg, new_exit_e,
2257                        gimple_phi_arg_location_from_edge (orig_phi, e));
2258           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2259         }
2260     }
2261
2262   /* End loop-exit-fixes after versioning.  */
2263
2264   if (cond_expr_stmt_list)
2265     {
2266       cond_exp_gsi = gsi_last_bb (condition_bb);
2267       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2268                              GSI_SAME_STMT);
2269     }
2270   update_ssa (TODO_update_ssa);
2271 }