gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "tree.h"
  28 #include "basic-block.h"
  29 #include "gimple-pretty-print.h"
  30 #include "tree-ssa-alias.h"
  31 #include "internal-fn.h"
  32 #include "gimple-expr.h"
  33 #include "is-a.h"
  34 #include "gimple.h"
  35 #include "gimplify.h"
  36 #include "gimple-iterator.h"
  37 #include "gimplify-me.h"
  38 #include "gimple-ssa.h"
  39 #include "tree-cfg.h"
  40 #include "tree-phinodes.h"
  41 #include "ssa-iterators.h"
  42 #include "stringpool.h"
  43 #include "tree-ssanames.h"
  44 #include "tree-ssa-loop-manip.h"
  45 #include "tree-into-ssa.h"
  46 #include "tree-ssa.h"
  47 #include "tree-pass.h"
  48 #include "cfgloop.h"
  49 #include "diagnostic-core.h"
  50 #include "tree-scalar-evolution.h"
  51 #include "tree-vectorizer.h"
  52 #include "langhooks.h"
  53
  54 /*************************************************************************
  55   Simple Loop Peeling Utilities
  56
  57   Utilities to support loop peeling for vectorization purposes.
  58  *************************************************************************/
  59
  60
  61 /* Renames the use *OP_P.  */
  62
  63 static void
  64 rename_use_op (use_operand_p op_p)
  65 {
  66   tree new_name;
  67
  68   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  69     return;
  70
  71   new_name = get_current_def (USE_FROM_PTR (op_p));
  72
  73   /* Something defined outside of the loop.  */
  74   if (!new_name)
  75     return;
  76
  77   /* An ordinary ssa name defined in the loop.  */
  78
  79   SET_USE (op_p, new_name);
  80 }
  81
  82
  83 /* Renames the variables in basic block BB.  */
  84
  85 static void
  86 rename_variables_in_bb (basic_block bb)
  87 {
  88   gimple stmt;
  89   use_operand_p use_p;
  90   ssa_op_iter iter;
  91   edge e;
  92   edge_iterator ei;
  93   struct loop *loop = bb->loop_father;
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         continue;
 107       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 108            gsi_next (&gsi))
 109         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 110     }
 111 }
 112
 113
 114 typedef struct
 115 {
 116   tree from, to;
 117   basic_block bb;
 118 } adjust_info;
 119
 120 /* A stack of values to be adjusted in debug stmts.  We have to
 121    process them LIFO, so that the closest substitution applies.  If we
 122    processed them FIFO, without the stack, we might substitute uses
 123    with a PHI DEF that would soon become non-dominant, and when we got
 124    to the suitable one, it wouldn't have anything to substitute any
 125    more.  */
 126 static vec<adjust_info, va_heap> adjust_vec;
 127
 128 /* Adjust any debug stmts that referenced AI->from values to use the
 129    loop-closed AI->to, if the references are dominated by AI->bb and
 130    not by the definition of AI->from.  */
 131
 132 static void
 133 adjust_debug_stmts_now (adjust_info *ai)
 134 {
 135   basic_block bbphi = ai->bb;
 136   tree orig_def = ai->from;
 137   tree new_def = ai->to;
 138   imm_use_iterator imm_iter;
 139   gimple stmt;
 140   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 141
 142   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 143
 144   /* Adjust any debug stmts that held onto non-loop-closed
 145      references.  */
 146   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 147     {
 148       use_operand_p use_p;
 149       basic_block bbuse;
 150
 151       if (!is_gimple_debug (stmt))
 152         continue;
 153
 154       gcc_assert (gimple_debug_bind_p (stmt));
 155
 156       bbuse = gimple_bb (stmt);
 157
 158       if ((bbuse == bbphi
 159            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 160           && !(bbuse == bbdef
 161                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 162         {
 163           if (new_def)
 164             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 165               SET_USE (use_p, new_def);
 166           else
 167             {
 168               gimple_debug_bind_reset_value (stmt);
 169               update_stmt (stmt);
 170             }
 171         }
 172     }
 173 }
 174
 175 /* Adjust debug stmts as scheduled before.  */
 176
 177 static void
 178 adjust_vec_debug_stmts (void)
 179 {
 180   if (!MAY_HAVE_DEBUG_STMTS)
 181     return;
 182
 183   gcc_assert (adjust_vec.exists ());
 184
 185   while (!adjust_vec.is_empty ())
 186     {
 187       adjust_debug_stmts_now (&adjust_vec.last ());
 188       adjust_vec.pop ();
 189     }
 190
 191   adjust_vec.release ();
 192 }
 193
 194 /* Adjust any debug stmts that referenced FROM values to use the
 195    loop-closed TO, if the references are dominated by BB and not by
 196    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 197    will be postponed until adjust_vec_debug_stmts is called.  */
 198
 199 static void
 200 adjust_debug_stmts (tree from, tree to, basic_block bb)
 201 {
 202   adjust_info ai;
 203
 204   if (MAY_HAVE_DEBUG_STMTS
 205       && TREE_CODE (from) == SSA_NAME
 206       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 207       && ! virtual_operand_p (from))
 208     {
 209       ai.from = from;
 210       ai.to = to;
 211       ai.bb = bb;
 212
 213       if (adjust_vec.exists ())
 214         adjust_vec.safe_push (ai);
 215       else
 216         adjust_debug_stmts_now (&ai);
 217     }
 218 }
 219
 220 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 221    to adjust any debug stmts that referenced the old phi arg,
 222    presumably non-loop-closed references left over from other
 223    transformations.  */
 224
 225 static void
 226 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 227 {
 228   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 229
 230   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 231
 232   if (MAY_HAVE_DEBUG_STMTS)
 233     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 234                         gimple_bb (update_phi));
 235 }
 236
 237
 238 /* Update PHI nodes for a guard of the LOOP.
 239
 240    Input:
 241    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 242         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 243         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 244         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 245         We denote this bb NEW_MERGE_BB because before the guard code was added
 246         it had a single predecessor (the LOOP header), and now it became a merge
 247         point of two paths - the path that ends with the LOOP exit-edge, and
 248         the path that ends with GUARD_EDGE.
 249    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 250         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 251
 252    ===> The CFG before the guard-code was added:
 253         LOOP_header_bb:
 254           loop_body
 255           if (exit_loop) goto update_bb
 256           else           goto LOOP_header_bb
 257         update_bb:
 258
 259    ==> The CFG after the guard-code was added:
 260         guard_bb:
 261           if (LOOP_guard_condition) goto new_merge_bb
 262           else                      goto LOOP_header_bb
 263         LOOP_header_bb:
 264           loop_body
 265           if (exit_loop_condition) goto new_merge_bb
 266           else                     goto LOOP_header_bb
 267         new_merge_bb:
 268           goto update_bb
 269         update_bb:
 270
 271    ==> The CFG after this function:
 272         guard_bb:
 273           if (LOOP_guard_condition) goto new_merge_bb
 274           else                      goto LOOP_header_bb
 275         LOOP_header_bb:
 276           loop_body
 277           if (exit_loop_condition) goto new_exit_bb
 278           else                     goto LOOP_header_bb
 279         new_exit_bb:
 280         new_merge_bb:
 281           goto update_bb
 282         update_bb:
 283
 284    This function:
 285    1. creates and updates the relevant phi nodes to account for the new
 286       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 287       1.1. Create phi nodes at NEW_MERGE_BB.
 288       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 289            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 290    2. preserves loop-closed-ssa-form by creating the required phi nodes
 291       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 292
 293    There are two flavors to this function:
 294
 295    slpeel_update_phi_nodes_for_guard1:
 296      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 297      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 298      for variables that have phis in the loop header.
 299
 300    slpeel_update_phi_nodes_for_guard2:
 301      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 302      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 303      for variables that have phis in the loop exit.
 304
 305    I.E., the overall structure is:
 306
 307         loop1_preheader_bb:
 308                 guard1 (goto loop1/merge1_bb)
 309         loop1
 310         loop1_exit_bb:
 311                 guard2 (goto merge1_bb/merge2_bb)
 312         merge1_bb
 313         loop2
 314         loop2_exit_bb
 315         merge2_bb
 316         next_bb
 317
 318    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 319    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 320    that have phis in loop1->header).
 321
 322    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 323    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 324    that have phis in next_bb). It also adds some of these phis to
 325    loop1_exit_bb.
 326
 327    slpeel_update_phi_nodes_for_guard1 is always called before
 328    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 329    to create correct data-flow and loop-closed-ssa-form.
 330
 331    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 332    that change between iterations of a loop (and therefore have a phi-node
 333    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 334    phis for variables that are used out of the loop (and therefore have
 335    loop-closed exit phis). Some variables may be both updated between
 336    iterations and used after the loop. This is why in loop1_exit_bb we
 337    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 338    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 339
 340    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 341      an original loop. i.e., we have:
 342
 343            orig_loop
 344            guard_bb (goto LOOP/new_merge)
 345            new_loop <-- LOOP
 346            new_exit
 347            new_merge
 348            next_bb
 349
 350      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 351      have:
 352
 353            new_loop
 354            guard_bb (goto LOOP/new_merge)
 355            orig_loop <-- LOOP
 356            new_exit
 357            new_merge
 358            next_bb
 359
 360      The SSA names defined in the original loop have a current
 361      reaching definition that that records the corresponding new
 362      ssa-name used in the new duplicated loop copy.
 363   */
 364
 365 /* Function slpeel_update_phi_nodes_for_guard1
 366
 367    Input:
 368    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 369    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 370             information.
 371
 372    In the context of the overall structure, we have:
 373
 374         loop1_preheader_bb:
 375                 guard1 (goto loop1/merge1_bb)
 376 LOOP->  loop1
 377         loop1_exit_bb:
 378                 guard2 (goto merge1_bb/merge2_bb)
 379         merge1_bb
 380         loop2
 381         loop2_exit_bb
 382         merge2_bb
 383         next_bb
 384
 385    For each name updated between loop iterations (i.e - for each name that has
 386    an entry (loop-header) phi in LOOP) we create a new phi in:
 387    1. merge1_bb (to account for the edge from guard1)
 388    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 389 */
 390
 391 static void
 392 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 393                                     bool is_new_loop, basic_block *new_exit_bb)
 394 {
 395   gphi *orig_phi, *new_phi;
 396   gphi *update_phi, *update_phi2;
 397   tree guard_arg, loop_arg;
 398   basic_block new_merge_bb = guard_edge->dest;
 399   edge e = EDGE_SUCC (new_merge_bb, 0);
 400   basic_block update_bb = e->dest;
 401   basic_block orig_bb = loop->header;
 402   edge new_exit_e;
 403   tree current_new_name;
 404   gphi_iterator gsi_orig, gsi_update;
 405
 406   /* Create new bb between loop and new_merge_bb.  */
 407   *new_exit_bb = split_edge (single_exit (loop));
 408
 409   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 410
 411   for (gsi_orig = gsi_start_phis (orig_bb),
 412        gsi_update = gsi_start_phis (update_bb);
 413        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 414        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 415     {
 416       source_location loop_locus, guard_locus;
 417       tree new_res;
 418       orig_phi = gsi_orig.phi ();
 419       update_phi = gsi_update.phi ();
 420
 421       /** 1. Handle new-merge-point phis  **/
 422
 423       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 424       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 425       new_phi = create_phi_node (new_res, new_merge_bb);
 426
 427       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 428             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 429       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 430       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 431                                                       EDGE_SUCC (loop->latch,
 432                                                                  0));
 433       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 434       guard_locus
 435         = gimple_phi_arg_location_from_edge (orig_phi,
 436                                              loop_preheader_edge (loop));
 437
 438       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 439       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 440
 441       /* 1.3. Update phi in successor block.  */
 442       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 443                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 444       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 445       update_phi2 = new_phi;
 446
 447
 448       /** 2. Handle loop-closed-ssa-form phis  **/
 449
 450       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 451         continue;
 452
 453       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 454       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 455       new_phi = create_phi_node (new_res, *new_exit_bb);
 456
 457       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 458       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 459
 460       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 461       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 462       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 463                                   PHI_RESULT (new_phi));
 464
 465       /* 2.4. Record the newly created name with set_current_def.
 466          We want to find a name such that
 467                 name = get_current_def (orig_loop_name)
 468          and to set its current definition as follows:
 469                 set_current_def (name, new_phi_name)
 470
 471          If LOOP is a new loop then loop_arg is already the name we're
 472          looking for. If LOOP is the original loop, then loop_arg is
 473          the orig_loop_name and the relevant name is recorded in its
 474          current reaching definition.  */
 475       if (is_new_loop)
 476         current_new_name = loop_arg;
 477       else
 478         {
 479           current_new_name = get_current_def (loop_arg);
 480           /* current_def is not available only if the variable does not
 481              change inside the loop, in which case we also don't care
 482              about recording a current_def for it because we won't be
 483              trying to create loop-exit-phis for it.  */
 484           if (!current_new_name)
 485             continue;
 486         }
 487       tree new_name = get_current_def (current_new_name);
 488       /* Because of peeled_chrec optimization it is possible that we have
 489          set this earlier.  Verify the PHI has the same value.  */
 490       if (new_name)
 491         {
 492           gimple phi = SSA_NAME_DEF_STMT (new_name);
 493           gcc_assert (gimple_code (phi) == GIMPLE_PHI
 494                       && gimple_bb (phi) == *new_exit_bb
 495                       && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
 496                           == loop_arg));
 497           continue;
 498         }
 499
 500       set_current_def (current_new_name, PHI_RESULT (new_phi));
 501     }
 502 }
 503
 504
 505 /* Function slpeel_update_phi_nodes_for_guard2
 506
 507    Input:
 508    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 509
 510    In the context of the overall structure, we have:
 511
 512         loop1_preheader_bb:
 513                 guard1 (goto loop1/merge1_bb)
 514         loop1
 515         loop1_exit_bb:
 516                 guard2 (goto merge1_bb/merge2_bb)
 517         merge1_bb
 518 LOOP->  loop2
 519         loop2_exit_bb
 520         merge2_bb
 521         next_bb
 522
 523    For each name used out side the loop (i.e - for each name that has an exit
 524    phi in next_bb) we create a new phi in:
 525    1. merge2_bb (to account for the edge from guard_bb)
 526    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 527    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 528       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 529 */
 530
 531 static void
 532 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 533                                     bool is_new_loop, basic_block *new_exit_bb)
 534 {
 535   gphi *orig_phi, *new_phi;
 536   gphi *update_phi, *update_phi2;
 537   tree guard_arg, loop_arg;
 538   basic_block new_merge_bb = guard_edge->dest;
 539   edge e = EDGE_SUCC (new_merge_bb, 0);
 540   basic_block update_bb = e->dest;
 541   edge new_exit_e;
 542   tree orig_def, orig_def_new_name;
 543   tree new_name, new_name2;
 544   tree arg;
 545   gphi_iterator gsi;
 546
 547   /* Create new bb between loop and new_merge_bb.  */
 548   *new_exit_bb = split_edge (single_exit (loop));
 549
 550   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 551
 552   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 553     {
 554       tree new_res;
 555       update_phi = gsi.phi ();
 556       orig_phi = update_phi;
 557       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 558       /* This loop-closed-phi actually doesn't represent a use
 559          out of the loop - the phi arg is a constant.  */
 560       if (TREE_CODE (orig_def) != SSA_NAME)
 561         continue;
 562       orig_def_new_name = get_current_def (orig_def);
 563       arg = NULL_TREE;
 564
 565       /** 1. Handle new-merge-point phis  **/
 566
 567       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 568       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 569       new_phi = create_phi_node (new_res, new_merge_bb);
 570
 571       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 572             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 573       new_name = orig_def;
 574       new_name2 = NULL_TREE;
 575       if (orig_def_new_name)
 576         {
 577           new_name = orig_def_new_name;
 578           /* Some variables have both loop-entry-phis and loop-exit-phis.
 579              Such variables were given yet newer names by phis placed in
 580              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 581              new_name2 = get_current_def (get_current_def (orig_name)).  */
 582           new_name2 = get_current_def (new_name);
 583         }
 584
 585       if (is_new_loop)
 586         {
 587           guard_arg = orig_def;
 588           loop_arg = new_name;
 589         }
 590       else
 591         {
 592           guard_arg = new_name;
 593           loop_arg = orig_def;
 594         }
 595       if (new_name2)
 596         guard_arg = new_name2;
 597
 598       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 599       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 600
 601       /* 1.3. Update phi in successor block.  */
 602       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 603       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 604       update_phi2 = new_phi;
 605
 606
 607       /** 2. Handle loop-closed-ssa-form phis  **/
 608
 609       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 610       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 611       new_phi = create_phi_node (new_res, *new_exit_bb);
 612
 613       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 614       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 615
 616       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 617       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 618       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 619                                   PHI_RESULT (new_phi));
 620
 621
 622       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 623
 624       /* 3.1. Find the relevant names that need an exit-phi in
 625          GUARD_BB, i.e. names for which
 626          slpeel_update_phi_nodes_for_guard1 had not already created a
 627          phi node. This is the case for names that are used outside
 628          the loop (and therefore need an exit phi) but are not updated
 629          across loop iterations (and therefore don't have a
 630          loop-header-phi).
 631
 632          slpeel_update_phi_nodes_for_guard1 is responsible for
 633          creating loop-exit phis in GUARD_BB for names that have a
 634          loop-header-phi.  When such a phi is created we also record
 635          the new name in its current definition.  If this new name
 636          exists, then guard_arg was set to this new name (see 1.2
 637          above).  Therefore, if guard_arg is not this new name, this
 638          is an indication that an exit-phi in GUARD_BB was not yet
 639          created, so we take care of it here.  */
 640       if (guard_arg == new_name2)
 641         continue;
 642       arg = guard_arg;
 643
 644       /* 3.2. Generate new phi node in GUARD_BB:  */
 645       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 646       new_phi = create_phi_node (new_res, guard_edge->src);
 647
 648       /* 3.3. GUARD_BB has one incoming edge:  */
 649       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 650       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 651                    UNKNOWN_LOCATION);
 652
 653       /* 3.4. Update phi in successor of GUARD_BB:  */
 654       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 655                                                                 == guard_arg);
 656       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 657                                   PHI_RESULT (new_phi));
 658     }
 659 }
 660
 661
 662 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 663    that starts at zero, increases by one and its limit is NITERS.
 664
 665    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 666
 667 void
 668 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 669 {
 670   tree indx_before_incr, indx_after_incr;
 671   gcond *cond_stmt;
 672   gcond *orig_cond;
 673   edge exit_edge = single_exit (loop);
 674   gimple_stmt_iterator loop_cond_gsi;
 675   gimple_stmt_iterator incr_gsi;
 676   bool insert_after;
 677   tree init = build_int_cst (TREE_TYPE (niters), 0);
 678   tree step = build_int_cst (TREE_TYPE (niters), 1);
 679   source_location loop_loc;
 680   enum tree_code code;
 681
 682   orig_cond = get_loop_exit_condition (loop);
 683   gcc_assert (orig_cond);
 684   loop_cond_gsi = gsi_for_stmt (orig_cond);
 685
 686   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 687   create_iv (init, step, NULL_TREE, loop,
 688              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 689
 690   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 691                                               true, NULL_TREE, true,
 692                                               GSI_SAME_STMT);
 693   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 694                                      true, GSI_SAME_STMT);
 695
 696   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 697   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 698                                  NULL_TREE);
 699
 700   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 701
 702   /* Remove old loop exit test:  */
 703   gsi_remove (&loop_cond_gsi, true);
 704   free_stmt_vec_info (orig_cond);
 705
 706   loop_loc = find_loop_location (loop);
 707   if (dump_enabled_p ())
 708     {
 709       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 710         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 711                      LOCATION_LINE (loop_loc));
 712       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 713       dump_printf (MSG_NOTE, "\n");
 714     }
 715   loop->nb_iterations = niters;
 716 }
 717
 718 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 719    For all PHI arguments in FROM->dest and TO->dest from those
 720    edges ensure that TO->dest PHI arguments have current_def
 721    to that in from.  */
 722
 723 static void
 724 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 725 {
 726   gimple_stmt_iterator gsi_from, gsi_to;
 727
 728   for (gsi_from = gsi_start_phis (from->dest),
 729        gsi_to = gsi_start_phis (to->dest);
 730        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
 731        gsi_next (&gsi_from), gsi_next (&gsi_to))
 732     {
 733       gimple from_phi = gsi_stmt (gsi_from);
 734       gimple to_phi = gsi_stmt (gsi_to);
 735       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 736       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 737       if (TREE_CODE (from_arg) == SSA_NAME
 738           && TREE_CODE (to_arg) == SSA_NAME
 739           && get_current_def (to_arg) == NULL_TREE)
 740         set_current_def (to_arg, get_current_def (from_arg));
 741     }
 742 }
 743
 744
 745 /* Given LOOP this function generates a new copy of it and puts it
 746    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 747    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 748    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 749    entry or exit of LOOP.  */
 750
 751 struct loop *
 752 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 753                                         struct loop *scalar_loop, edge e)
 754 {
 755   struct loop *new_loop;
 756   basic_block *new_bbs, *bbs;
 757   bool at_exit;
 758   bool was_imm_dom;
 759   basic_block exit_dest;
 760   edge exit, new_exit;
 761
 762   exit = single_exit (loop);
 763   at_exit = (e == exit);
 764   if (!at_exit && e != loop_preheader_edge (loop))
 765     return NULL;
 766
 767   if (scalar_loop == NULL)
 768     scalar_loop = loop;
 769
 770   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 771   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
 772
 773   /* Check whether duplication is possible.  */
 774   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
 775     {
 776       free (bbs);
 777       return NULL;
 778     }
 779
 780   /* Generate new loop structure.  */
 781   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 782   duplicate_subloops (scalar_loop, new_loop);
 783
 784   exit_dest = exit->dest;
 785   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 786                                           exit_dest) == loop->header ?
 787                  true : false);
 788
 789   /* Also copy the pre-header, this avoids jumping through hoops to
 790      duplicate the loop entry PHI arguments.  Create an empty
 791      pre-header unconditionally for this.  */
 792   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 793   edge entry_e = single_pred_edge (preheader);
 794   bbs[scalar_loop->num_nodes] = preheader;
 795   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 796
 797   exit = single_exit (scalar_loop);
 798   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 799             &exit, 1, &new_exit, NULL,
 800             e->src, true);
 801   exit = single_exit (loop);
 802   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
 803
 804   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 805
 806   if (scalar_loop != loop)
 807     {
 808       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 809          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 810          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 811          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 812          header) to have current_def set, so copy them over.  */
 813       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 814                                                 exit);
 815       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 816                                                            0),
 817                                                 EDGE_SUCC (loop->latch, 0));
 818     }
 819
 820   if (at_exit) /* Add the loop copy at exit.  */
 821     {
 822       if (scalar_loop != loop)
 823         {
 824           gphi_iterator gsi;
 825           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 826
 827           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 828                gsi_next (&gsi))
 829             {
 830               gphi *phi = gsi.phi ();
 831               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 832               location_t orig_locus
 833                 = gimple_phi_arg_location_from_edge (phi, e);
 834
 835               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 836             }
 837         }
 838       redirect_edge_and_branch_force (e, new_preheader);
 839       flush_pending_stmts (e);
 840       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 841       if (was_imm_dom)
 842         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 843
 844       /* And remove the non-necessary forwarder again.  Keep the other
 845          one so we have a proper pre-header for the loop at the exit edge.  */
 846       redirect_edge_pred (single_succ_edge (preheader),
 847                           single_pred (preheader));
 848       delete_basic_block (preheader);
 849       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 850                                loop_preheader_edge (scalar_loop)->src);
 851     }
 852   else /* Add the copy at entry.  */
 853     {
 854       if (scalar_loop != loop)
 855         {
 856           /* Remove the non-necessary forwarder of scalar_loop again.  */
 857           redirect_edge_pred (single_succ_edge (preheader),
 858                               single_pred (preheader));
 859           delete_basic_block (preheader);
 860           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 861                                    loop_preheader_edge (scalar_loop)->src);
 862           preheader = split_edge (loop_preheader_edge (loop));
 863           entry_e = single_pred_edge (preheader);
 864         }
 865
 866       redirect_edge_and_branch_force (entry_e, new_preheader);
 867       flush_pending_stmts (entry_e);
 868       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 869
 870       redirect_edge_and_branch_force (new_exit, preheader);
 871       flush_pending_stmts (new_exit);
 872       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 873
 874       /* And remove the non-necessary forwarder again.  Keep the other
 875          one so we have a proper pre-header for the loop at the exit edge.  */
 876       redirect_edge_pred (single_succ_edge (new_preheader),
 877                           single_pred (new_preheader));
 878       delete_basic_block (new_preheader);
 879       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 880                                loop_preheader_edge (new_loop)->src);
 881     }
 882
 883   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 884     rename_variables_in_bb (new_bbs[i]);
 885
 886   if (scalar_loop != loop)
 887     {
 888       /* Update new_loop->header PHIs, so that on the preheader
 889          edge they are the ones from loop rather than scalar_loop.  */
 890       gphi_iterator gsi_orig, gsi_new;
 891       edge orig_e = loop_preheader_edge (loop);
 892       edge new_e = loop_preheader_edge (new_loop);
 893
 894       for (gsi_orig = gsi_start_phis (loop->header),
 895            gsi_new = gsi_start_phis (new_loop->header);
 896            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 897            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 898         {
 899           gphi *orig_phi = gsi_orig.phi ();
 900           gphi *new_phi = gsi_new.phi ();
 901           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 902           location_t orig_locus
 903             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 904
 905           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 906         }
 907     }
 908
 909   free (new_bbs);
 910   free (bbs);
 911
 912 #ifdef ENABLE_CHECKING
 913   verify_dominators (CDI_DOMINATORS);
 914 #endif
 915
 916   return new_loop;
 917 }
 918
 919
 920 /* Given the condition statement COND, put it as the last statement
 921    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 922    Assumes that this is the single exit of the guarded loop.
 923    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 924
 925 static edge
 926 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 927                        gimple_seq cond_expr_stmt_list,
 928                        basic_block exit_bb, basic_block dom_bb,
 929                        int probability)
 930 {
 931   gimple_stmt_iterator gsi;
 932   edge new_e, enter_e;
 933   gcond *cond_stmt;
 934   gimple_seq gimplify_stmt_list = NULL;
 935
 936   enter_e = EDGE_SUCC (guard_bb, 0);
 937   enter_e->flags &= ~EDGE_FALLTHRU;
 938   enter_e->flags |= EDGE_FALSE_VALUE;
 939   gsi = gsi_last_bb (guard_bb);
 940
 941   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 942                                  NULL_TREE);
 943   if (gimplify_stmt_list)
 944     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 945   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 946   if (cond_expr_stmt_list)
 947     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 948
 949   gsi = gsi_last_bb (guard_bb);
 950   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 951
 952   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 953   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 954
 955   new_e->count = guard_bb->count;
 956   new_e->probability = probability;
 957   new_e->count = apply_probability (enter_e->count, probability);
 958   enter_e->count -= new_e->count;
 959   enter_e->probability = inverse_probability (probability);
 960   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 961   return new_e;
 962 }
 963
 964
 965 /* This function verifies that the following restrictions apply to LOOP:
 966    (1) it is innermost
 967    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 968    (3) it is single entry, single exit
 969    (4) its exit condition is the last stmt in the header
 970    (5) E is the entry/exit edge of LOOP.
 971  */
 972
 973 bool
 974 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 975 {
 976   edge exit_e = single_exit (loop);
 977   edge entry_e = loop_preheader_edge (loop);
 978   gcond *orig_cond = get_loop_exit_condition (loop);
 979   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 980
 981   if (loop->inner
 982       /* All loops have an outer scope; the only case loop->outer is NULL is for
 983          the function itself.  */
 984       || !loop_outer (loop)
 985       || loop->num_nodes != 2
 986       || !empty_block_p (loop->latch)
 987       || !single_exit (loop)
 988       /* Verify that new loop exit condition can be trivially modified.  */
 989       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 990       || (e != exit_e && e != entry_e))
 991     return false;
 992
 993   return true;
 994 }
 995
 996 #ifdef ENABLE_CHECKING
 997 static void
 998 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 999                                  struct loop *second_loop)
1000 {
1001   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1002   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1003   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1004
1005   /* A guard that controls whether the second_loop is to be executed or skipped
1006      is placed in first_loop->exit.  first_loop->exit therefore has two
1007      successors - one is the preheader of second_loop, and the other is a bb
1008      after second_loop.
1009    */
1010   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1011
1012   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1013         of second_loop.  */
1014
1015   /* The preheader of new_loop is expected to have two predecessors:
1016      first_loop->exit and the block that precedes first_loop.  */
1017
1018   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1019               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1020                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1021                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1022                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1023
1024   /* Verify that the other successor of first_loop->exit is after the
1025      second_loop.  */
1026   /* TODO */
1027 }
1028 #endif
1029
1030 /* If the run time cost model check determines that vectorization is
1031    not profitable and hence scalar loop should be generated then set
1032    FIRST_NITERS to prologue peeled iterations. This will allow all the
1033    iterations to be executed in the prologue peeled scalar loop.  */
1034
1035 static void
1036 set_prologue_iterations (basic_block bb_before_first_loop,
1037                          tree *first_niters,
1038                          struct loop *loop,
1039                          unsigned int th,
1040                          int probability)
1041 {
1042   edge e;
1043   basic_block cond_bb, then_bb;
1044   tree var, prologue_after_cost_adjust_name;
1045   gimple_stmt_iterator gsi;
1046   gphi *newphi;
1047   edge e_true, e_false, e_fallthru;
1048   gcond *cond_stmt;
1049   gimple_seq stmts = NULL;
1050   tree cost_pre_condition = NULL_TREE;
1051   tree scalar_loop_iters =
1052     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1053
1054   e = single_pred_edge (bb_before_first_loop);
1055   cond_bb = split_edge (e);
1056
1057   e = single_pred_edge (bb_before_first_loop);
1058   then_bb = split_edge (e);
1059   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1060
1061   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1062                                    EDGE_FALSE_VALUE);
1063   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1064
1065   e_true = EDGE_PRED (then_bb, 0);
1066   e_true->flags &= ~EDGE_FALLTHRU;
1067   e_true->flags |= EDGE_TRUE_VALUE;
1068
1069   e_true->probability = probability;
1070   e_false->probability = inverse_probability (probability);
1071   e_true->count = apply_probability (cond_bb->count, probability);
1072   e_false->count = cond_bb->count - e_true->count;
1073   then_bb->frequency = EDGE_FREQUENCY (e_true);
1074   then_bb->count = e_true->count;
1075
1076   e_fallthru = EDGE_SUCC (then_bb, 0);
1077   e_fallthru->count = then_bb->count;
1078
1079   gsi = gsi_last_bb (cond_bb);
1080   cost_pre_condition =
1081     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1082                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1083   cost_pre_condition =
1084     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1085                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
1086   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1087                                            NULL_TREE, NULL_TREE);
1088   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1089
1090   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1091                         "prologue_after_cost_adjust");
1092   prologue_after_cost_adjust_name =
1093     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1094
1095   gsi = gsi_last_bb (then_bb);
1096   if (stmts)
1097     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1098
1099   newphi = create_phi_node (var, bb_before_first_loop);
1100   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1101                UNKNOWN_LOCATION);
1102   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1103
1104   *first_niters = PHI_RESULT (newphi);
1105 }
1106
1107 /* Function slpeel_tree_peel_loop_to_edge.
1108
1109    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1110    that is placed on the entry (exit) edge E of LOOP. After this transformation
1111    we have two loops one after the other - first-loop iterates FIRST_NITERS
1112    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1113    If the cost model indicates that it is profitable to emit a scalar
1114    loop instead of the vector one, then the prolog (epilog) loop will iterate
1115    for the entire unchanged scalar iterations of the loop.
1116
1117    Input:
1118    - LOOP: the loop to be peeled.
1119    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1120         should be copied.
1121    - E: the exit or entry edge of LOOP.
1122         If it is the entry edge, we peel the first iterations of LOOP. In this
1123         case first-loop is LOOP, and second-loop is the newly created loop.
1124         If it is the exit edge, we peel the last iterations of LOOP. In this
1125         case, first-loop is the newly created loop, and second-loop is LOOP.
1126    - NITERS: the number of iterations that LOOP iterates.
1127    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1128    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1129         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1130         is false, the caller of this function may want to take care of this
1131         (this can be useful if we don't want new stmts added to first-loop).
1132    - TH: cost model profitability threshold of iterations for vectorization.
1133    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1134                           during versioning and hence needs to occur during
1135                           prologue generation or whether cost model check
1136                           has not occurred during prologue generation and hence
1137                           needs to occur during epilogue generation.
1138    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1139    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1140
1141
1142    Output:
1143    The function returns a pointer to the new loop-copy, or NULL if it failed
1144    to perform the transformation.
1145
1146    The function generates two if-then-else guards: one before the first loop,
1147    and the other before the second loop:
1148    The first guard is:
1149      if (FIRST_NITERS == 0) then skip the first loop,
1150      and go directly to the second loop.
1151    The second guard is:
1152      if (FIRST_NITERS == NITERS) then skip the second loop.
1153
1154    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1155    then the generated condition is combined with COND_EXPR and the
1156    statements in COND_EXPR_STMT_LIST are emitted together with it.
1157
1158    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1159    FORNOW the resulting code will not be in loop-closed-ssa form.
1160 */
1161
1162 static struct loop *
1163 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1164                                edge e, tree *first_niters,
1165                                tree niters, bool update_first_loop_count,
1166                                unsigned int th, bool check_profitability,
1167                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1168                                int bound1, int bound2)
1169 {
1170   struct loop *new_loop = NULL, *first_loop, *second_loop;
1171   edge skip_e;
1172   tree pre_condition = NULL_TREE;
1173   basic_block bb_before_second_loop, bb_after_second_loop;
1174   basic_block bb_before_first_loop;
1175   basic_block bb_between_loops;
1176   basic_block new_exit_bb;
1177   gphi_iterator gsi;
1178   edge exit_e = single_exit (loop);
1179   source_location loop_loc;
1180   /* There are many aspects to how likely the first loop is going to be executed.
1181      Without histogram we can't really do good job.  Simply set it to
1182      2/3, so the first loop is not reordered to the end of function and
1183      the hot path through stays short.  */
1184   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1185   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1186   int probability_of_second_loop;
1187
1188   if (!slpeel_can_duplicate_loop_p (loop, e))
1189     return NULL;
1190
1191   /* We might have a queued need to update virtual SSA form.  As we
1192      delete the update SSA machinery below after doing a regular
1193      incremental SSA update during loop copying make sure we don't
1194      lose that fact.
1195      ???  Needing to update virtual SSA form by renaming is unfortunate
1196      but not all of the vectorizer code inserting new loads / stores
1197      properly assigns virtual operands to those statements.  */
1198   update_ssa (TODO_update_ssa_only_virtuals);
1199
1200   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1201      in the exit bb and rename all the uses after the loop.  This simplifies
1202      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1203      (but normally loop closed SSA form doesn't require virtual PHIs to be
1204      in the same form).  Doing this early simplifies the checking what
1205      uses should be renamed.  */
1206   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1207     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1208       {
1209         gphi *phi = gsi.phi ();
1210         for (gsi = gsi_start_phis (exit_e->dest);
1211              !gsi_end_p (gsi); gsi_next (&gsi))
1212           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1213             break;
1214         if (gsi_end_p (gsi))
1215           {
1216             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1217             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1218             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1219             imm_use_iterator imm_iter;
1220             gimple stmt;
1221             use_operand_p use_p;
1222
1223             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1224             gimple_phi_set_result (new_phi, new_vop);
1225             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1226               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1227                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1228                   SET_USE (use_p, new_vop);
1229           }
1230         break;
1231       }
1232
1233   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1234         Resulting CFG would be:
1235
1236         first_loop:
1237         do {
1238         } while ...
1239
1240         second_loop:
1241         do {
1242         } while ...
1243
1244         orig_exit_bb:
1245    */
1246
1247   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1248                                                            e)))
1249     {
1250       loop_loc = find_loop_location (loop);
1251       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1252                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1253       return NULL;
1254     }
1255
1256   if (MAY_HAVE_DEBUG_STMTS)
1257     {
1258       gcc_assert (!adjust_vec.exists ());
1259       adjust_vec.create (32);
1260     }
1261
1262   if (e == exit_e)
1263     {
1264       /* NEW_LOOP was placed after LOOP.  */
1265       first_loop = loop;
1266       second_loop = new_loop;
1267     }
1268   else
1269     {
1270       /* NEW_LOOP was placed before LOOP.  */
1271       first_loop = new_loop;
1272       second_loop = loop;
1273     }
1274
1275   /* 2.  Add the guard code in one of the following ways:
1276
1277      2.a Add the guard that controls whether the first loop is executed.
1278          This occurs when this function is invoked for prologue or epilogue
1279          generation and when the cost model check can be done at compile time.
1280
1281          Resulting CFG would be:
1282
1283          bb_before_first_loop:
1284          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1285                                 GOTO first-loop
1286
1287          first_loop:
1288          do {
1289          } while ...
1290
1291          bb_before_second_loop:
1292
1293          second_loop:
1294          do {
1295          } while ...
1296
1297          orig_exit_bb:
1298
1299      2.b Add the cost model check that allows the prologue
1300          to iterate for the entire unchanged scalar
1301          iterations of the loop in the event that the cost
1302          model indicates that the scalar loop is more
1303          profitable than the vector one. This occurs when
1304          this function is invoked for prologue generation
1305          and the cost model check needs to be done at run
1306          time.
1307
1308          Resulting CFG after prologue peeling would be:
1309
1310          if (scalar_loop_iterations <= th)
1311            FIRST_NITERS = scalar_loop_iterations
1312
1313          bb_before_first_loop:
1314          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1315                                 GOTO first-loop
1316
1317          first_loop:
1318          do {
1319          } while ...
1320
1321          bb_before_second_loop:
1322
1323          second_loop:
1324          do {
1325          } while ...
1326
1327          orig_exit_bb:
1328
1329      2.c Add the cost model check that allows the epilogue
1330          to iterate for the entire unchanged scalar
1331          iterations of the loop in the event that the cost
1332          model indicates that the scalar loop is more
1333          profitable than the vector one. This occurs when
1334          this function is invoked for epilogue generation
1335          and the cost model check needs to be done at run
1336          time.  This check is combined with any pre-existing
1337          check in COND_EXPR to avoid versioning.
1338
1339          Resulting CFG after prologue peeling would be:
1340
1341          bb_before_first_loop:
1342          if ((scalar_loop_iterations <= th)
1343              ||
1344              FIRST_NITERS == 0) GOTO bb_before_second_loop
1345                                 GOTO first-loop
1346
1347          first_loop:
1348          do {
1349          } while ...
1350
1351          bb_before_second_loop:
1352
1353          second_loop:
1354          do {
1355          } while ...
1356
1357          orig_exit_bb:
1358   */
1359
1360   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1361   /* Loop copying insterted a forwarder block for us here.  */
1362   bb_before_second_loop = single_exit (first_loop)->dest;
1363
1364   probability_of_second_loop = (inverse_probability (first_guard_probability)
1365                                 + combine_probabilities (second_guard_probability,
1366                                                          first_guard_probability));
1367   /* Theoretically preheader edge of first loop and exit edge should have
1368      same frequencies.  Loop exit probablities are however easy to get wrong.
1369      It is safer to copy value from original loop entry.  */
1370   bb_before_second_loop->frequency
1371      = combine_probabilities (bb_before_first_loop->frequency,
1372                               probability_of_second_loop);
1373   bb_before_second_loop->count
1374      = apply_probability (bb_before_first_loop->count,
1375                           probability_of_second_loop);
1376   single_succ_edge (bb_before_second_loop)->count
1377      = bb_before_second_loop->count;
1378
1379   /* Epilogue peeling.  */
1380   if (!update_first_loop_count)
1381     {
1382       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1383       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1384       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1385       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1386         limit = limit + 1;
1387       if (check_profitability
1388           && th > limit)
1389         limit = th;
1390       pre_condition =
1391         fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1392                      build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1393       if (cond_expr)
1394         {
1395           pre_condition =
1396             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1397                          pre_condition,
1398                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1399                                       cond_expr));
1400         }
1401     }
1402
1403   /* Prologue peeling.  */
1404   else
1405     {
1406       if (check_profitability)
1407         set_prologue_iterations (bb_before_first_loop, first_niters,
1408                                  loop, th, first_guard_probability);
1409
1410       pre_condition =
1411         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1412                      build_int_cst (TREE_TYPE (*first_niters), 0));
1413     }
1414
1415   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1416                                   cond_expr_stmt_list,
1417                                   bb_before_second_loop, bb_before_first_loop,
1418                                   inverse_probability (first_guard_probability));
1419   scale_loop_profile (first_loop, first_guard_probability,
1420                       check_profitability && (int)th > bound1 ? th : bound1);
1421   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1422                                       first_loop == new_loop,
1423                                       &new_exit_bb);
1424
1425
1426   /* 3. Add the guard that controls whether the second loop is executed.
1427         Resulting CFG would be:
1428
1429         bb_before_first_loop:
1430         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1431                                GOTO first-loop
1432
1433         first_loop:
1434         do {
1435         } while ...
1436
1437         bb_between_loops:
1438         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1439                                     GOTO bb_before_second_loop
1440
1441         bb_before_second_loop:
1442
1443         second_loop:
1444         do {
1445         } while ...
1446
1447         bb_after_second_loop:
1448
1449         orig_exit_bb:
1450    */
1451
1452   bb_between_loops = new_exit_bb;
1453   bb_after_second_loop = split_edge (single_exit (second_loop));
1454
1455   pre_condition =
1456         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1457   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1458                                   bb_after_second_loop, bb_before_first_loop,
1459                                   inverse_probability (second_guard_probability));
1460   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1461   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1462                                      second_loop == new_loop, &new_exit_bb);
1463
1464   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1465    */
1466   if (update_first_loop_count)
1467     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1468
1469   delete_update_ssa ();
1470
1471   adjust_vec_debug_stmts ();
1472
1473   return new_loop;
1474 }
1475
1476 /* Function vect_get_loop_location.
1477
1478    Extract the location of the loop in the source code.
1479    If the loop is not well formed for vectorization, an estimated
1480    location is calculated.
1481    Return the loop location if succeed and NULL if not.  */
1482
1483 source_location
1484 find_loop_location (struct loop *loop)
1485 {
1486   gimple stmt = NULL;
1487   basic_block bb;
1488   gimple_stmt_iterator si;
1489
1490   if (!loop)
1491     return UNKNOWN_LOCATION;
1492
1493   stmt = get_loop_exit_condition (loop);
1494
1495   if (stmt
1496       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1497     return gimple_location (stmt);
1498
1499   /* If we got here the loop is probably not "well formed",
1500      try to estimate the loop location */
1501
1502   if (!loop->header)
1503     return UNKNOWN_LOCATION;
1504
1505   bb = loop->header;
1506
1507   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1508     {
1509       stmt = gsi_stmt (si);
1510       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1511         return gimple_location (stmt);
1512     }
1513
1514   return UNKNOWN_LOCATION;
1515 }
1516
1517
1518 /* Function vect_can_advance_ivs_p
1519
1520    In case the number of iterations that LOOP iterates is unknown at compile
1521    time, an epilog loop will be generated, and the loop induction variables
1522    (IVs) will be "advanced" to the value they are supposed to take just before
1523    the epilog loop.  Here we check that the access function of the loop IVs
1524    and the expression that represents the loop bound are simple enough.
1525    These restrictions will be relaxed in the future.  */
1526
1527 bool
1528 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1529 {
1530   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1531   basic_block bb = loop->header;
1532   gimple phi;
1533   gphi_iterator gsi;
1534
1535   /* Analyze phi functions of the loop header.  */
1536
1537   if (dump_enabled_p ())
1538     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1539   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1540     {
1541       tree evolution_part;
1542
1543       phi = gsi.phi ();
1544       if (dump_enabled_p ())
1545         {
1546           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1547           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1548           dump_printf (MSG_NOTE, "\n");
1549         }
1550
1551       /* Skip virtual phi's. The data dependences that are associated with
1552          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1553
1554       if (virtual_operand_p (PHI_RESULT (phi)))
1555         {
1556           if (dump_enabled_p ())
1557             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558                              "virtual phi. skip.\n");
1559           continue;
1560         }
1561
1562       /* Skip reduction phis.  */
1563
1564       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1565         {
1566           if (dump_enabled_p ())
1567             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1568                              "reduc phi. skip.\n");
1569           continue;
1570         }
1571
1572       /* Analyze the evolution function.  */
1573
1574       evolution_part
1575         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1576       if (evolution_part == NULL_TREE)
1577         {
1578           if (dump_enabled_p ())
1579             dump_printf (MSG_MISSED_OPTIMIZATION,
1580                          "No access function or evolution.\n");
1581           return false;
1582         }
1583
1584       /* FORNOW: We do not transform initial conditions of IVs
1585          which evolution functions are a polynomial of degree >= 2.  */
1586
1587       if (tree_is_chrec (evolution_part))
1588         return false;
1589     }
1590
1591   return true;
1592 }
1593
1594
1595 /*   Function vect_update_ivs_after_vectorizer.
1596
1597      "Advance" the induction variables of LOOP to the value they should take
1598      after the execution of LOOP.  This is currently necessary because the
1599      vectorizer does not handle induction variables that are used after the
1600      loop.  Such a situation occurs when the last iterations of LOOP are
1601      peeled, because:
1602      1. We introduced new uses after LOOP for IVs that were not originally used
1603         after LOOP: the IVs of LOOP are now used by an epilog loop.
1604      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1605         times, whereas the loop IVs should be bumped N times.
1606
1607      Input:
1608      - LOOP - a loop that is going to be vectorized. The last few iterations
1609               of LOOP were peeled.
1610      - NITERS - the number of iterations that LOOP executes (before it is
1611                 vectorized). i.e, the number of times the ivs should be bumped.
1612      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1613                   coming out from LOOP on which there are uses of the LOOP ivs
1614                   (this is the path from LOOP->exit to epilog_loop->preheader).
1615
1616                   The new definitions of the ivs are placed in LOOP->exit.
1617                   The phi args associated with the edge UPDATE_E in the bb
1618                   UPDATE_E->dest are updated accordingly.
1619
1620      Assumption 1: Like the rest of the vectorizer, this function assumes
1621      a single loop exit that has a single predecessor.
1622
1623      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1624      organized in the same order.
1625
1626      Assumption 3: The access function of the ivs is simple enough (see
1627      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1628
1629      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1630      coming out of LOOP on which the ivs of LOOP are used (this is the path
1631      that leads to the epilog loop; other paths skip the epilog loop).  This
1632      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1633      needs to have its phis updated.
1634  */
1635
1636 static void
1637 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1638                                   edge update_e)
1639 {
1640   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1641   basic_block exit_bb = single_exit (loop)->dest;
1642   gphi *phi, *phi1;
1643   gphi_iterator gsi, gsi1;
1644   basic_block update_bb = update_e->dest;
1645
1646   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1647
1648   /* Make sure there exists a single-predecessor exit bb:  */
1649   gcc_assert (single_pred_p (exit_bb));
1650
1651   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1652        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1653        gsi_next (&gsi), gsi_next (&gsi1))
1654     {
1655       tree init_expr;
1656       tree step_expr, off;
1657       tree type;
1658       tree var, ni, ni_name;
1659       gimple_stmt_iterator last_gsi;
1660       stmt_vec_info stmt_info;
1661
1662       phi = gsi.phi ();
1663       phi1 = gsi1.phi ();
1664       if (dump_enabled_p ())
1665         {
1666           dump_printf_loc (MSG_NOTE, vect_location,
1667                            "vect_update_ivs_after_vectorizer: phi: ");
1668           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1669           dump_printf (MSG_NOTE, "\n");
1670         }
1671
1672       /* Skip virtual phi's.  */
1673       if (virtual_operand_p (PHI_RESULT (phi)))
1674         {
1675           if (dump_enabled_p ())
1676             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1677                              "virtual phi. skip.\n");
1678           continue;
1679         }
1680
1681       /* Skip reduction phis.  */
1682       stmt_info = vinfo_for_stmt (phi);
1683       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1684         {
1685           if (dump_enabled_p ())
1686             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1687                              "reduc phi. skip.\n");
1688           continue;
1689         }
1690
1691       type = TREE_TYPE (gimple_phi_result (phi));
1692       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1693       step_expr = unshare_expr (step_expr);
1694
1695       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1696          of degree >= 2 or exponential.  */
1697       gcc_assert (!tree_is_chrec (step_expr));
1698
1699       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1700
1701       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1702                          fold_convert (TREE_TYPE (step_expr), niters),
1703                          step_expr);
1704       if (POINTER_TYPE_P (type))
1705         ni = fold_build_pointer_plus (init_expr, off);
1706       else
1707         ni = fold_build2 (PLUS_EXPR, type,
1708                           init_expr, fold_convert (type, off));
1709
1710       var = create_tmp_var (type, "tmp");
1711
1712       last_gsi = gsi_last_bb (exit_bb);
1713       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1714                                           true, GSI_SAME_STMT);
1715
1716       /* Fix phi expressions in the successor bb.  */
1717       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1718     }
1719 }
1720
1721 /* Function vect_do_peeling_for_loop_bound
1722
1723    Peel the last iterations of the loop represented by LOOP_VINFO.
1724    The peeled iterations form a new epilog loop.  Given that the loop now
1725    iterates NITERS times, the new epilog loop iterates
1726    NITERS % VECTORIZATION_FACTOR times.
1727
1728    The original loop will later be made to iterate
1729    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1730
1731    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1732    test.  */
1733
1734 void
1735 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1736                                 tree ni_name, tree ratio_mult_vf_name,
1737                                 unsigned int th, bool check_profitability)
1738 {
1739   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1740   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1741   struct loop *new_loop;
1742   edge update_e;
1743   basic_block preheader;
1744   int loop_num;
1745   int max_iter;
1746   tree cond_expr = NULL_TREE;
1747   gimple_seq cond_expr_stmt_list = NULL;
1748
1749   if (dump_enabled_p ())
1750     dump_printf_loc (MSG_NOTE, vect_location,
1751                      "=== vect_do_peeling_for_loop_bound ===\n");
1752
1753   initialize_original_copy_tables ();
1754
1755   loop_num  = loop->num;
1756
1757   new_loop
1758     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1759                                      &ratio_mult_vf_name, ni_name, false,
1760                                      th, check_profitability,
1761                                      cond_expr, cond_expr_stmt_list,
1762                                      0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1763   gcc_assert (new_loop);
1764   gcc_assert (loop_num == loop->num);
1765 #ifdef ENABLE_CHECKING
1766   slpeel_verify_cfg_after_peeling (loop, new_loop);
1767 #endif
1768
1769   /* A guard that controls whether the new_loop is to be executed or skipped
1770      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1771      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1772      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1773      is on the path where the LOOP IVs are used and need to be updated.  */
1774
1775   preheader = loop_preheader_edge (new_loop)->src;
1776   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1777     update_e = EDGE_PRED (preheader, 0);
1778   else
1779     update_e = EDGE_PRED (preheader, 1);
1780
1781   /* Update IVs of original loop as if they were advanced
1782      by ratio_mult_vf_name steps.  */
1783   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1784
1785   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1786      and this means N-2 loopback edge executions.
1787
1788      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1789      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1790   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1791               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1792               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1793   if (check_profitability)
1794     max_iter = MAX (max_iter, (int) th - 1);
1795   record_niter_bound (new_loop, max_iter, false, true);
1796   dump_printf (MSG_NOTE,
1797                "Setting upper bound of nb iterations for epilogue "
1798                "loop to %d\n", max_iter);
1799
1800   /* After peeling we have to reset scalar evolution analyzer.  */
1801   scev_reset ();
1802
1803   free_original_copy_tables ();
1804 }
1805
1806
1807 /* Function vect_gen_niters_for_prolog_loop
1808
1809    Set the number of iterations for the loop represented by LOOP_VINFO
1810    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1811    and the misalignment of DR - the data reference recorded in
1812    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1813    this loop, the data reference DR will refer to an aligned location.
1814
1815    The following computation is generated:
1816
1817    If the misalignment of DR is known at compile time:
1818      addr_mis = int mis = DR_MISALIGNMENT (dr);
1819    Else, compute address misalignment in bytes:
1820      addr_mis = addr & (vectype_align - 1)
1821
1822    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1823
1824    (elem_size = element type size; an element is the scalar element whose type
1825    is the inner type of the vectype)
1826
1827    When the step of the data-ref in the loop is not 1 (as in interleaved data
1828    and SLP), the number of iterations of the prolog must be divided by the step
1829    (which is equal to the size of interleaved group).
1830
1831    The above formulas assume that VF == number of elements in the vector. This
1832    may not hold when there are multiple-types in the loop.
1833    In this case, for some data-references in the loop the VF does not represent
1834    the number of elements that fit in the vector.  Therefore, instead of VF we
1835    use TYPE_VECTOR_SUBPARTS.  */
1836
1837 static tree
1838 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1839 {
1840   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1841   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1842   tree var;
1843   gimple_seq stmts;
1844   tree iters, iters_name;
1845   edge pe;
1846   basic_block new_bb;
1847   gimple dr_stmt = DR_STMT (dr);
1848   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1849   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1850   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1851   tree niters_type = TREE_TYPE (loop_niters);
1852   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1853
1854   pe = loop_preheader_edge (loop);
1855
1856   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1857     {
1858       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1859
1860       if (dump_enabled_p ())
1861         dump_printf_loc (MSG_NOTE, vect_location,
1862                          "known peeling = %d.\n", npeel);
1863
1864       iters = build_int_cst (niters_type, npeel);
1865       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1866     }
1867   else
1868     {
1869       gimple_seq new_stmts = NULL;
1870       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1871       tree offset = negative
1872           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1873       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1874                                                 &new_stmts, offset, loop);
1875       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1876       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1877       HOST_WIDE_INT elem_size =
1878                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1879       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1880       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1881       tree nelements_tree = build_int_cst (type, nelements);
1882       tree byte_misalign;
1883       tree elem_misalign;
1884
1885       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1886       gcc_assert (!new_bb);
1887
1888       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1889       byte_misalign =
1890         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1891                      vectype_align_minus_1);
1892
1893       /* Create:  elem_misalign = byte_misalign / element_size  */
1894       elem_misalign =
1895         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1896
1897       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1898       if (negative)
1899         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1900       else
1901         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1902       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1903       iters = fold_convert (niters_type, iters);
1904       *bound = nelements;
1905     }
1906
1907   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1908   /* If the loop bound is known at compile time we already verified that it is
1909      greater than vf; since the misalignment ('iters') is at most vf, there's
1910      no need to generate the MIN_EXPR in this case.  */
1911   if (TREE_CODE (loop_niters) != INTEGER_CST)
1912     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1913
1914   if (dump_enabled_p ())
1915     {
1916       dump_printf_loc (MSG_NOTE, vect_location,
1917                        "niters for prolog loop: ");
1918       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1919       dump_printf (MSG_NOTE, "\n");
1920     }
1921
1922   var = create_tmp_var (niters_type, "prolog_loop_niters");
1923   stmts = NULL;
1924   iters_name = force_gimple_operand (iters, &stmts, false, var);
1925
1926   /* Insert stmt on loop preheader edge.  */
1927   if (stmts)
1928     {
1929       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1930       gcc_assert (!new_bb);
1931     }
1932
1933   return iters_name;
1934 }
1935
1936
1937 /* Function vect_update_init_of_dr
1938
1939    NITERS iterations were peeled from LOOP.  DR represents a data reference
1940    in LOOP.  This function updates the information recorded in DR to
1941    account for the fact that the first NITERS iterations had already been
1942    executed.  Specifically, it updates the OFFSET field of DR.  */
1943
1944 static void
1945 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1946 {
1947   tree offset = DR_OFFSET (dr);
1948
1949   niters = fold_build2 (MULT_EXPR, sizetype,
1950                         fold_convert (sizetype, niters),
1951                         fold_convert (sizetype, DR_STEP (dr)));
1952   offset = fold_build2 (PLUS_EXPR, sizetype,
1953                         fold_convert (sizetype, offset), niters);
1954   DR_OFFSET (dr) = offset;
1955 }
1956
1957
1958 /* Function vect_update_inits_of_drs
1959
1960    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1961    This function updates the information recorded for the data references in
1962    the loop to account for the fact that the first NITERS iterations had
1963    already been executed.  Specifically, it updates the initial_condition of
1964    the access_function of all the data_references in the loop.  */
1965
1966 static void
1967 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1968 {
1969   unsigned int i;
1970   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1971   struct data_reference *dr;
1972
1973  if (dump_enabled_p ())
1974     dump_printf_loc (MSG_NOTE, vect_location,
1975                      "=== vect_update_inits_of_dr ===\n");
1976
1977   FOR_EACH_VEC_ELT (datarefs, i, dr)
1978     vect_update_init_of_dr (dr, niters);
1979 }
1980
1981
1982 /* Function vect_do_peeling_for_alignment
1983
1984    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1985    'niters' is set to the misalignment of one of the data references in the
1986    loop, thereby forcing it to refer to an aligned location at the beginning
1987    of the execution of this loop.  The data reference for which we are
1988    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1989
1990 void
1991 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
1992                                unsigned int th, bool check_profitability)
1993 {
1994   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1995   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1996   tree niters_of_prolog_loop;
1997   tree wide_prolog_niters;
1998   struct loop *new_loop;
1999   int max_iter;
2000   int bound = 0;
2001
2002   if (dump_enabled_p ())
2003     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2004                      "loop peeled for vectorization to enhance"
2005                      " alignment\n");
2006
2007   initialize_original_copy_tables ();
2008
2009   gimple_seq stmts = NULL;
2010   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2011   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2012                                                            ni_name,
2013                                                            &bound);
2014
2015   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2016   new_loop =
2017     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2018                                    loop_preheader_edge (loop),
2019                                    &niters_of_prolog_loop, ni_name, true,
2020                                    th, check_profitability, NULL_TREE, NULL,
2021                                    bound, 0);
2022
2023   gcc_assert (new_loop);
2024 #ifdef ENABLE_CHECKING
2025   slpeel_verify_cfg_after_peeling (new_loop, loop);
2026 #endif
2027   /* For vectorization factor N, we need to copy at most N-1 values
2028      for alignment and this means N-2 loopback edge executions.  */
2029   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2030   if (check_profitability)
2031     max_iter = MAX (max_iter, (int) th - 1);
2032   record_niter_bound (new_loop, max_iter, false, true);
2033   dump_printf (MSG_NOTE,
2034                "Setting upper bound of nb iterations for prologue "
2035                "loop to %d\n", max_iter);
2036
2037   /* Update number of times loop executes.  */
2038   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2039                 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2040   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2041                 TREE_TYPE (ni_name),
2042                 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2043
2044   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2045     wide_prolog_niters = niters_of_prolog_loop;
2046   else
2047     {
2048       gimple_seq seq = NULL;
2049       edge pe = loop_preheader_edge (loop);
2050       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2051       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2052       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2053                                                  var);
2054       if (seq)
2055         {
2056           /* Insert stmt on loop preheader edge.  */
2057           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2058           gcc_assert (!new_bb);
2059         }
2060     }
2061
2062   /* Update the init conditions of the access functions of all data refs.  */
2063   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2064
2065   /* After peeling we have to reset scalar evolution analyzer.  */
2066   scev_reset ();
2067
2068   free_original_copy_tables ();
2069 }
2070
2071
2072 /* Function vect_create_cond_for_align_checks.
2073
2074    Create a conditional expression that represents the alignment checks for
2075    all of data references (array element references) whose alignment must be
2076    checked at runtime.
2077
2078    Input:
2079    COND_EXPR  - input conditional expression.  New conditions will be chained
2080                 with logical AND operation.
2081    LOOP_VINFO - two fields of the loop information are used.
2082                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2083                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2084
2085    Output:
2086    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2087                          expression.
2088    The returned value is the conditional expression to be used in the if
2089    statement that controls which version of the loop gets executed at runtime.
2090
2091    The algorithm makes two assumptions:
2092      1) The number of bytes "n" in a vector is a power of 2.
2093      2) An address "a" is aligned if a%n is zero and that this
2094         test can be done as a&(n-1) == 0.  For example, for 16
2095         byte vectors the test is a&0xf == 0.  */
2096
2097 static void
2098 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2099                                    tree *cond_expr,
2100                                    gimple_seq *cond_expr_stmt_list)
2101 {
2102   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2103   vec<gimple> may_misalign_stmts
2104     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2105   gimple ref_stmt;
2106   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2107   tree mask_cst;
2108   unsigned int i;
2109   tree int_ptrsize_type;
2110   char tmp_name[20];
2111   tree or_tmp_name = NULL_TREE;
2112   tree and_tmp_name;
2113   gimple and_stmt;
2114   tree ptrsize_zero;
2115   tree part_cond_expr;
2116
2117   /* Check that mask is one less than a power of 2, i.e., mask is
2118      all zeros followed by all ones.  */
2119   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2120
2121   int_ptrsize_type = signed_type_for (ptr_type_node);
2122
2123   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2124      of the first vector of the i'th data reference. */
2125
2126   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2127     {
2128       gimple_seq new_stmt_list = NULL;
2129       tree addr_base;
2130       tree addr_tmp_name;
2131       tree new_or_tmp_name;
2132       gimple addr_stmt, or_stmt;
2133       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2134       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2135       bool negative = tree_int_cst_compare
2136         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2137       tree offset = negative
2138         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2139
2140       /* create: addr_tmp = (int)(address_of_first_vector) */
2141       addr_base =
2142         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2143                                               offset, loop);
2144       if (new_stmt_list != NULL)
2145         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2146
2147       sprintf (tmp_name, "addr2int%d", i);
2148       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2149       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2150                                                 addr_base, NULL_TREE);
2151       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2152
2153       /* The addresses are OR together.  */
2154
2155       if (or_tmp_name != NULL_TREE)
2156         {
2157           /* create: or_tmp = or_tmp | addr_tmp */
2158           sprintf (tmp_name, "orptrs%d", i);
2159           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2160           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2161                                                   new_or_tmp_name,
2162                                                   or_tmp_name, addr_tmp_name);
2163           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2164           or_tmp_name = new_or_tmp_name;
2165         }
2166       else
2167         or_tmp_name = addr_tmp_name;
2168
2169     } /* end for i */
2170
2171   mask_cst = build_int_cst (int_ptrsize_type, mask);
2172
2173   /* create: and_tmp = or_tmp & mask  */
2174   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2175
2176   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2177                                            or_tmp_name, mask_cst);
2178   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2179
2180   /* Make and_tmp the left operand of the conditional test against zero.
2181      if and_tmp has a nonzero bit then some address is unaligned.  */
2182   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2183   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2184                                 and_tmp_name, ptrsize_zero);
2185   if (*cond_expr)
2186     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2187                               *cond_expr, part_cond_expr);
2188   else
2189     *cond_expr = part_cond_expr;
2190 }
2191
2192 /* Function vect_create_cond_for_alias_checks.
2193
2194    Create a conditional expression that represents the run-time checks for
2195    overlapping of address ranges represented by a list of data references
2196    relations passed as input.
2197
2198    Input:
2199    COND_EXPR  - input conditional expression.  New conditions will be chained
2200                 with logical AND operation.  If it is NULL, then the function
2201                 is used to return the number of alias checks.
2202    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2203                 to be checked.
2204
2205    Output:
2206    COND_EXPR - conditional expression.
2207
2208    The returned COND_EXPR is the conditional expression to be used in the if
2209    statement that controls which version of the loop gets executed at runtime.
2210 */
2211
2212 void
2213 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2214 {
2215   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2216     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2217   tree part_cond_expr;
2218
2219   /* Create expression
2220      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2221      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2222      &&
2223      ...
2224      &&
2225      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2226      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2227
2228   if (comp_alias_ddrs.is_empty ())
2229     return;
2230
2231   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2232     {
2233       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2234       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2235       tree segment_length_a = dr_a.seg_len;
2236       tree segment_length_b = dr_b.seg_len;
2237
2238       tree addr_base_a
2239         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2240       tree addr_base_b
2241         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2242
2243       if (dump_enabled_p ())
2244         {
2245           dump_printf_loc (MSG_NOTE, vect_location,
2246                            "create runtime check for data references ");
2247           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2248           dump_printf (MSG_NOTE, " and ");
2249           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2250           dump_printf (MSG_NOTE, "\n");
2251         }
2252
2253       tree seg_a_min = addr_base_a;
2254       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2255       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2256          bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2257          [a, a+12) */
2258       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2259         {
2260           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2261           seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2262           seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2263         }
2264
2265       tree seg_b_min = addr_base_b;
2266       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2267       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2268         {
2269           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2270           seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2271           seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2272         }
2273
2274       part_cond_expr =
2275         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2276           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2277           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2278
2279       if (*cond_expr)
2280         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2281                                   *cond_expr, part_cond_expr);
2282       else
2283         *cond_expr = part_cond_expr;
2284     }
2285
2286   if (dump_enabled_p ())
2287     dump_printf_loc (MSG_NOTE, vect_location,
2288                      "created %u versioning for alias checks.\n",
2289                      comp_alias_ddrs.length ());
2290
2291   comp_alias_ddrs.release ();
2292 }
2293
2294
2295 /* Function vect_loop_versioning.
2296
2297    If the loop has data references that may or may not be aligned or/and
2298    has data reference relations whose independence was not proven then
2299    two versions of the loop need to be generated, one which is vectorized
2300    and one which isn't.  A test is then generated to control which of the
2301    loops is executed.  The test checks for the alignment of all of the
2302    data references that may or may not be aligned.  An additional
2303    sequence of runtime tests is generated for each pairs of DDRs whose
2304    independence was not proven.  The vectorized version of loop is
2305    executed only if both alias and alignment tests are passed.
2306
2307    The test generated to check which version of loop is executed
2308    is modified to also check for profitability as indicated by the
2309    cost model initially.
2310
2311    The versioning precondition(s) are placed in *COND_EXPR and
2312    *COND_EXPR_STMT_LIST.  */
2313
2314 void
2315 vect_loop_versioning (loop_vec_info loop_vinfo,
2316                       unsigned int th, bool check_profitability)
2317 {
2318   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2319   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2320   basic_block condition_bb;
2321   gphi_iterator gsi;
2322   gimple_stmt_iterator cond_exp_gsi;
2323   basic_block merge_bb;
2324   basic_block new_exit_bb;
2325   edge new_exit_e, e;
2326   gphi *orig_phi, *new_phi;
2327   tree cond_expr = NULL_TREE;
2328   gimple_seq cond_expr_stmt_list = NULL;
2329   tree arg;
2330   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2331   gimple_seq gimplify_stmt_list = NULL;
2332   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2333   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2334   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2335
2336   if (check_profitability)
2337     {
2338       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2339                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2340       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2341                                           is_gimple_condexpr, NULL_TREE);
2342     }
2343
2344   if (version_align)
2345     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2346                                        &cond_expr_stmt_list);
2347
2348   if (version_alias)
2349     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2350
2351   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2352                                       is_gimple_condexpr, NULL_TREE);
2353   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2354
2355   initialize_original_copy_tables ();
2356   if (scalar_loop)
2357     {
2358       edge scalar_e;
2359       basic_block preheader, scalar_preheader;
2360
2361       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2362          scale LOOP's frequencies instead.  */
2363       loop_version (scalar_loop, cond_expr, &condition_bb,
2364                     prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2365       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2366       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2367          while we need to move it above LOOP's preheader.  */
2368       e = loop_preheader_edge (loop);
2369       scalar_e = loop_preheader_edge (scalar_loop);
2370       gcc_assert (empty_block_p (e->src)
2371                   && single_pred_p (e->src));
2372       gcc_assert (empty_block_p (scalar_e->src)
2373                   && single_pred_p (scalar_e->src));
2374       gcc_assert (single_pred_p (condition_bb));
2375       preheader = e->src;
2376       scalar_preheader = scalar_e->src;
2377       scalar_e = find_edge (condition_bb, scalar_preheader);
2378       e = single_pred_edge (preheader);
2379       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2380                                       scalar_preheader);
2381       redirect_edge_and_branch_force (scalar_e, preheader);
2382       redirect_edge_and_branch_force (e, condition_bb);
2383       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2384                                single_pred (condition_bb));
2385       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2386                                single_pred (scalar_preheader));
2387       set_immediate_dominator (CDI_DOMINATORS, preheader,
2388                                condition_bb);
2389     }
2390   else
2391     loop_version (loop, cond_expr, &condition_bb,
2392                   prob, prob, REG_BR_PROB_BASE - prob, true);
2393
2394   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2395       && dump_enabled_p ())
2396     {
2397       if (version_alias)
2398         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2399                          "loop versioned for vectorization because of "
2400                          "possible aliasing\n");
2401       if (version_align)
2402         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2403                          "loop versioned for vectorization to enhance "
2404                          "alignment\n");
2405
2406     }
2407   free_original_copy_tables ();
2408
2409   /* Loop versioning violates an assumption we try to maintain during
2410      vectorization - that the loop exit block has a single predecessor.
2411      After versioning, the exit block of both loop versions is the same
2412      basic block (i.e. it has two predecessors). Just in order to simplify
2413      following transformations in the vectorizer, we fix this situation
2414      here by adding a new (empty) block on the exit-edge of the loop,
2415      with the proper loop-exit phis to maintain loop-closed-form.
2416      If loop versioning wasn't done from loop, but scalar_loop instead,
2417      merge_bb will have already just a single successor.  */
2418
2419   merge_bb = single_exit (loop)->dest;
2420   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2421     {
2422       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2423       new_exit_bb = split_edge (single_exit (loop));
2424       new_exit_e = single_exit (loop);
2425       e = EDGE_SUCC (new_exit_bb, 0);
2426
2427       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2428         {
2429           tree new_res;
2430           orig_phi = gsi.phi ();
2431           new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2432           new_phi = create_phi_node (new_res, new_exit_bb);
2433           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2434           add_phi_arg (new_phi, arg, new_exit_e,
2435                        gimple_phi_arg_location_from_edge (orig_phi, e));
2436           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2437         }
2438     }
2439
2440   /* End loop-exit-fixes after versioning.  */
2441
2442   if (cond_expr_stmt_list)
2443     {
2444       cond_exp_gsi = gsi_last_bb (condition_bb);
2445       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2446                              GSI_SAME_STMT);
2447     }
2448   update_ssa (TODO_update_ssa);
2449 }