gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI argumnets
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src)
 106           && (!rename_from_outer_loop || e->src != outer_loop->header))
 107         continue;
 108       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 109            gsi_next (&gsi))
 110         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 111     }
 112 }
 113
 114
 115 struct adjust_info
 116 {
 117   tree from, to;
 118   basic_block bb;
 119 };
 120
 121 /* A stack of values to be adjusted in debug stmts.  We have to
 122    process them LIFO, so that the closest substitution applies.  If we
 123    processed them FIFO, without the stack, we might substitute uses
 124    with a PHI DEF that would soon become non-dominant, and when we got
 125    to the suitable one, it wouldn't have anything to substitute any
 126    more.  */
 127 static vec<adjust_info, va_heap> adjust_vec;
 128
 129 /* Adjust any debug stmts that referenced AI->from values to use the
 130    loop-closed AI->to, if the references are dominated by AI->bb and
 131    not by the definition of AI->from.  */
 132
 133 static void
 134 adjust_debug_stmts_now (adjust_info *ai)
 135 {
 136   basic_block bbphi = ai->bb;
 137   tree orig_def = ai->from;
 138   tree new_def = ai->to;
 139   imm_use_iterator imm_iter;
 140   gimple *stmt;
 141   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 142
 143   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 144
 145   /* Adjust any debug stmts that held onto non-loop-closed
 146      references.  */
 147   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 148     {
 149       use_operand_p use_p;
 150       basic_block bbuse;
 151
 152       if (!is_gimple_debug (stmt))
 153         continue;
 154
 155       gcc_assert (gimple_debug_bind_p (stmt));
 156
 157       bbuse = gimple_bb (stmt);
 158
 159       if ((bbuse == bbphi
 160            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 161           && !(bbuse == bbdef
 162                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 163         {
 164           if (new_def)
 165             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 166               SET_USE (use_p, new_def);
 167           else
 168             {
 169               gimple_debug_bind_reset_value (stmt);
 170               update_stmt (stmt);
 171             }
 172         }
 173     }
 174 }
 175
 176 /* Adjust debug stmts as scheduled before.  */
 177
 178 static void
 179 adjust_vec_debug_stmts (void)
 180 {
 181   if (!MAY_HAVE_DEBUG_STMTS)
 182     return;
 183
 184   gcc_assert (adjust_vec.exists ());
 185
 186   while (!adjust_vec.is_empty ())
 187     {
 188       adjust_debug_stmts_now (&adjust_vec.last ());
 189       adjust_vec.pop ();
 190     }
 191
 192   adjust_vec.release ();
 193 }
 194
 195 /* Adjust any debug stmts that referenced FROM values to use the
 196    loop-closed TO, if the references are dominated by BB and not by
 197    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 198    will be postponed until adjust_vec_debug_stmts is called.  */
 199
 200 static void
 201 adjust_debug_stmts (tree from, tree to, basic_block bb)
 202 {
 203   adjust_info ai;
 204
 205   if (MAY_HAVE_DEBUG_STMTS
 206       && TREE_CODE (from) == SSA_NAME
 207       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 208       && ! virtual_operand_p (from))
 209     {
 210       ai.from = from;
 211       ai.to = to;
 212       ai.bb = bb;
 213
 214       if (adjust_vec.exists ())
 215         adjust_vec.safe_push (ai);
 216       else
 217         adjust_debug_stmts_now (&ai);
 218     }
 219 }
 220
 221 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 222    to adjust any debug stmts that referenced the old phi arg,
 223    presumably non-loop-closed references left over from other
 224    transformations.  */
 225
 226 static void
 227 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 228 {
 229   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 230
 231   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 232
 233   if (MAY_HAVE_DEBUG_STMTS)
 234     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 235                         gimple_bb (update_phi));
 236 }
 237
 238
 239 /* Update PHI nodes for a guard of the LOOP.
 240
 241    Input:
 242    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 243         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 244         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 245         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 246         We denote this bb NEW_MERGE_BB because before the guard code was added
 247         it had a single predecessor (the LOOP header), and now it became a merge
 248         point of two paths - the path that ends with the LOOP exit-edge, and
 249         the path that ends with GUARD_EDGE.
 250    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 251         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 252
 253    ===> The CFG before the guard-code was added:
 254         LOOP_header_bb:
 255           loop_body
 256           if (exit_loop) goto update_bb
 257           else           goto LOOP_header_bb
 258         update_bb:
 259
 260    ==> The CFG after the guard-code was added:
 261         guard_bb:
 262           if (LOOP_guard_condition) goto new_merge_bb
 263           else                      goto LOOP_header_bb
 264         LOOP_header_bb:
 265           loop_body
 266           if (exit_loop_condition) goto new_merge_bb
 267           else                     goto LOOP_header_bb
 268         new_merge_bb:
 269           goto update_bb
 270         update_bb:
 271
 272    ==> The CFG after this function:
 273         guard_bb:
 274           if (LOOP_guard_condition) goto new_merge_bb
 275           else                      goto LOOP_header_bb
 276         LOOP_header_bb:
 277           loop_body
 278           if (exit_loop_condition) goto new_exit_bb
 279           else                     goto LOOP_header_bb
 280         new_exit_bb:
 281         new_merge_bb:
 282           goto update_bb
 283         update_bb:
 284
 285    This function:
 286    1. creates and updates the relevant phi nodes to account for the new
 287       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 288       1.1. Create phi nodes at NEW_MERGE_BB.
 289       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 290            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 291    2. preserves loop-closed-ssa-form by creating the required phi nodes
 292       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 293
 294    There are two flavors to this function:
 295
 296    slpeel_update_phi_nodes_for_guard1:
 297      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 298      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 299      for variables that have phis in the loop header.
 300
 301    slpeel_update_phi_nodes_for_guard2:
 302      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 303      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 304      for variables that have phis in the loop exit.
 305
 306    I.E., the overall structure is:
 307
 308         loop1_preheader_bb:
 309                 guard1 (goto loop1/merge1_bb)
 310         loop1
 311         loop1_exit_bb:
 312                 guard2 (goto merge1_bb/merge2_bb)
 313         merge1_bb
 314         loop2
 315         loop2_exit_bb
 316         merge2_bb
 317         next_bb
 318
 319    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 320    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 321    that have phis in loop1->header).
 322
 323    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 324    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 325    that have phis in next_bb). It also adds some of these phis to
 326    loop1_exit_bb.
 327
 328    slpeel_update_phi_nodes_for_guard1 is always called before
 329    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 330    to create correct data-flow and loop-closed-ssa-form.
 331
 332    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 333    that change between iterations of a loop (and therefore have a phi-node
 334    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 335    phis for variables that are used out of the loop (and therefore have
 336    loop-closed exit phis). Some variables may be both updated between
 337    iterations and used after the loop. This is why in loop1_exit_bb we
 338    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 339    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 340
 341    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 342      an original loop. i.e., we have:
 343
 344            orig_loop
 345            guard_bb (goto LOOP/new_merge)
 346            new_loop <-- LOOP
 347            new_exit
 348            new_merge
 349            next_bb
 350
 351      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 352      have:
 353
 354            new_loop
 355            guard_bb (goto LOOP/new_merge)
 356            orig_loop <-- LOOP
 357            new_exit
 358            new_merge
 359            next_bb
 360
 361      The SSA names defined in the original loop have a current
 362      reaching definition that records the corresponding new ssa-name
 363      used in the new duplicated loop copy.
 364   */
 365
 366 /* Function slpeel_update_phi_nodes_for_guard1
 367
 368    Input:
 369    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 370    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 371             information.
 372
 373    In the context of the overall structure, we have:
 374
 375         loop1_preheader_bb:
 376                 guard1 (goto loop1/merge1_bb)
 377 LOOP->  loop1
 378         loop1_exit_bb:
 379                 guard2 (goto merge1_bb/merge2_bb)
 380         merge1_bb
 381         loop2
 382         loop2_exit_bb
 383         merge2_bb
 384         next_bb
 385
 386    For each name updated between loop iterations (i.e - for each name that has
 387    an entry (loop-header) phi in LOOP) we create a new phi in:
 388    1. merge1_bb (to account for the edge from guard1)
 389    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 390 */
 391
 392 static void
 393 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 394                                     bool is_new_loop, basic_block *new_exit_bb)
 395 {
 396   gphi *orig_phi, *new_phi;
 397   gphi *update_phi, *update_phi2;
 398   tree guard_arg, loop_arg;
 399   basic_block new_merge_bb = guard_edge->dest;
 400   edge e = EDGE_SUCC (new_merge_bb, 0);
 401   basic_block update_bb = e->dest;
 402   basic_block orig_bb = loop->header;
 403   edge new_exit_e;
 404   tree current_new_name;
 405   gphi_iterator gsi_orig, gsi_update;
 406
 407   /* Create new bb between loop and new_merge_bb.  */
 408   *new_exit_bb = split_edge (single_exit (loop));
 409
 410   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 411
 412   for (gsi_orig = gsi_start_phis (orig_bb),
 413        gsi_update = gsi_start_phis (update_bb);
 414        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 415        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 416     {
 417       source_location loop_locus, guard_locus;
 418       tree new_res;
 419       orig_phi = gsi_orig.phi ();
 420       update_phi = gsi_update.phi ();
 421
 422       /** 1. Handle new-merge-point phis  **/
 423
 424       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 425       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 426       new_phi = create_phi_node (new_res, new_merge_bb);
 427
 428       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 429             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 430       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 431       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 432                                                       EDGE_SUCC (loop->latch,
 433                                                                  0));
 434       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 435       guard_locus
 436         = gimple_phi_arg_location_from_edge (orig_phi,
 437                                              loop_preheader_edge (loop));
 438
 439       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 440       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 441
 442       /* 1.3. Update phi in successor block.  */
 443       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 444                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 445       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 446       update_phi2 = new_phi;
 447
 448
 449       /** 2. Handle loop-closed-ssa-form phis  **/
 450
 451       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 452         continue;
 453
 454       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 455       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 456       new_phi = create_phi_node (new_res, *new_exit_bb);
 457
 458       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 459       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 460
 461       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 462       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 463       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 464                                   PHI_RESULT (new_phi));
 465
 466       /* 2.4. Record the newly created name with set_current_def.
 467          We want to find a name such that
 468                 name = get_current_def (orig_loop_name)
 469          and to set its current definition as follows:
 470                 set_current_def (name, new_phi_name)
 471
 472          If LOOP is a new loop then loop_arg is already the name we're
 473          looking for. If LOOP is the original loop, then loop_arg is
 474          the orig_loop_name and the relevant name is recorded in its
 475          current reaching definition.  */
 476       if (is_new_loop)
 477         current_new_name = loop_arg;
 478       else
 479         {
 480           current_new_name = get_current_def (loop_arg);
 481           /* current_def is not available only if the variable does not
 482              change inside the loop, in which case we also don't care
 483              about recording a current_def for it because we won't be
 484              trying to create loop-exit-phis for it.  */
 485           if (!current_new_name)
 486             continue;
 487         }
 488       tree new_name = get_current_def (current_new_name);
 489       /* Because of peeled_chrec optimization it is possible that we have
 490          set this earlier.  Verify the PHI has the same value.  */
 491       if (new_name)
 492         {
 493           gimple *phi = SSA_NAME_DEF_STMT (new_name);
 494           gcc_assert (gimple_code (phi) == GIMPLE_PHI
 495                       && gimple_bb (phi) == *new_exit_bb
 496                       && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
 497                           == loop_arg));
 498           continue;
 499         }
 500
 501       set_current_def (current_new_name, PHI_RESULT (new_phi));
 502     }
 503 }
 504
 505
 506 /* Function slpeel_update_phi_nodes_for_guard2
 507
 508    Input:
 509    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 510
 511    In the context of the overall structure, we have:
 512
 513         loop1_preheader_bb:
 514                 guard1 (goto loop1/merge1_bb)
 515         loop1
 516         loop1_exit_bb:
 517                 guard2 (goto merge1_bb/merge2_bb)
 518         merge1_bb
 519 LOOP->  loop2
 520         loop2_exit_bb
 521         merge2_bb
 522         next_bb
 523
 524    For each name used out side the loop (i.e - for each name that has an exit
 525    phi in next_bb) we create a new phi in:
 526    1. merge2_bb (to account for the edge from guard_bb)
 527    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 528    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 529       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 530 */
 531
 532 static void
 533 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 534                                     bool is_new_loop, basic_block *new_exit_bb)
 535 {
 536   gphi *orig_phi, *new_phi;
 537   gphi *update_phi, *update_phi2;
 538   tree guard_arg, loop_arg;
 539   basic_block new_merge_bb = guard_edge->dest;
 540   edge e = EDGE_SUCC (new_merge_bb, 0);
 541   basic_block update_bb = e->dest;
 542   edge new_exit_e;
 543   tree orig_def, orig_def_new_name;
 544   tree new_name, new_name2;
 545   tree arg;
 546   gphi_iterator gsi;
 547
 548   /* Create new bb between loop and new_merge_bb.  */
 549   *new_exit_bb = split_edge (single_exit (loop));
 550
 551   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 552
 553   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 554     {
 555       tree new_res;
 556       update_phi = gsi.phi ();
 557       orig_phi = update_phi;
 558       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 559       /* This loop-closed-phi actually doesn't represent a use
 560          out of the loop - the phi arg is a constant.  */
 561       if (TREE_CODE (orig_def) != SSA_NAME)
 562         continue;
 563       orig_def_new_name = get_current_def (orig_def);
 564       arg = NULL_TREE;
 565
 566       /** 1. Handle new-merge-point phis  **/
 567
 568       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 569       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 570       new_phi = create_phi_node (new_res, new_merge_bb);
 571
 572       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 573             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 574       new_name = orig_def;
 575       new_name2 = NULL_TREE;
 576       if (orig_def_new_name)
 577         {
 578           new_name = orig_def_new_name;
 579           /* Some variables have both loop-entry-phis and loop-exit-phis.
 580              Such variables were given yet newer names by phis placed in
 581              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 582              new_name2 = get_current_def (get_current_def (orig_name)).  */
 583           new_name2 = get_current_def (new_name);
 584         }
 585
 586       if (is_new_loop)
 587         {
 588           guard_arg = orig_def;
 589           loop_arg = new_name;
 590         }
 591       else
 592         {
 593           guard_arg = new_name;
 594           loop_arg = orig_def;
 595         }
 596       if (new_name2)
 597         guard_arg = new_name2;
 598
 599       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 600       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 601
 602       /* 1.3. Update phi in successor block.  */
 603       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 604       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 605       update_phi2 = new_phi;
 606
 607
 608       /** 2. Handle loop-closed-ssa-form phis  **/
 609
 610       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 611         continue;
 612
 613       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 614       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 615       new_phi = create_phi_node (new_res, *new_exit_bb);
 616
 617       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 618       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 619
 620       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 621       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 622       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 623                                   PHI_RESULT (new_phi));
 624
 625
 626       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 627
 628       /* 3.1. Find the relevant names that need an exit-phi in
 629          GUARD_BB, i.e. names for which
 630          slpeel_update_phi_nodes_for_guard1 had not already created a
 631          phi node. This is the case for names that are used outside
 632          the loop (and therefore need an exit phi) but are not updated
 633          across loop iterations (and therefore don't have a
 634          loop-header-phi).
 635
 636          slpeel_update_phi_nodes_for_guard1 is responsible for
 637          creating loop-exit phis in GUARD_BB for names that have a
 638          loop-header-phi.  When such a phi is created we also record
 639          the new name in its current definition.  If this new name
 640          exists, then guard_arg was set to this new name (see 1.2
 641          above).  Therefore, if guard_arg is not this new name, this
 642          is an indication that an exit-phi in GUARD_BB was not yet
 643          created, so we take care of it here.  */
 644       if (guard_arg == new_name2)
 645         continue;
 646       arg = guard_arg;
 647
 648       /* 3.2. Generate new phi node in GUARD_BB:  */
 649       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 650       new_phi = create_phi_node (new_res, guard_edge->src);
 651
 652       /* 3.3. GUARD_BB has one incoming edge:  */
 653       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 654       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 655                    UNKNOWN_LOCATION);
 656
 657       /* 3.4. Update phi in successor of GUARD_BB:  */
 658       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 659                                                                 == guard_arg);
 660       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 661                                   PHI_RESULT (new_phi));
 662     }
 663 }
 664
 665
 666 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 667    that starts at zero, increases by one and its limit is NITERS.
 668
 669    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 670
 671 void
 672 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 673 {
 674   tree indx_before_incr, indx_after_incr;
 675   gcond *cond_stmt;
 676   gcond *orig_cond;
 677   edge exit_edge = single_exit (loop);
 678   gimple_stmt_iterator loop_cond_gsi;
 679   gimple_stmt_iterator incr_gsi;
 680   bool insert_after;
 681   tree init = build_int_cst (TREE_TYPE (niters), 0);
 682   tree step = build_int_cst (TREE_TYPE (niters), 1);
 683   source_location loop_loc;
 684   enum tree_code code;
 685
 686   orig_cond = get_loop_exit_condition (loop);
 687   gcc_assert (orig_cond);
 688   loop_cond_gsi = gsi_for_stmt (orig_cond);
 689
 690   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 691   create_iv (init, step, NULL_TREE, loop,
 692              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 693
 694   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 695                                               true, NULL_TREE, true,
 696                                               GSI_SAME_STMT);
 697   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 698                                      true, GSI_SAME_STMT);
 699
 700   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 701   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 702                                  NULL_TREE);
 703
 704   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 705
 706   /* Remove old loop exit test:  */
 707   gsi_remove (&loop_cond_gsi, true);
 708   free_stmt_vec_info (orig_cond);
 709
 710   loop_loc = find_loop_location (loop);
 711   if (dump_enabled_p ())
 712     {
 713       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 714         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 715                      LOCATION_LINE (loop_loc));
 716       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 717     }
 718   loop->nb_iterations = niters;
 719 }
 720
 721 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 722    For all PHI arguments in FROM->dest and TO->dest from those
 723    edges ensure that TO->dest PHI arguments have current_def
 724    to that in from.  */
 725
 726 static void
 727 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 728 {
 729   gimple_stmt_iterator gsi_from, gsi_to;
 730
 731   for (gsi_from = gsi_start_phis (from->dest),
 732        gsi_to = gsi_start_phis (to->dest);
 733        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 734     {
 735       gimple *from_phi = gsi_stmt (gsi_from);
 736       gimple *to_phi = gsi_stmt (gsi_to);
 737       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 738       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 739       if (virtual_operand_p (from_arg))
 740         {
 741           gsi_next (&gsi_from);
 742           continue;
 743         }
 744       if (virtual_operand_p (to_arg))
 745         {
 746           gsi_next (&gsi_to);
 747           continue;
 748         }
 749       if (TREE_CODE (from_arg) != SSA_NAME)
 750         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 751       else
 752         {
 753           if (get_current_def (to_arg) == NULL_TREE)
 754             set_current_def (to_arg, get_current_def (from_arg));
 755         }
 756       gsi_next (&gsi_from);
 757       gsi_next (&gsi_to);
 758     }
 759
 760   gphi *from_phi = get_virtual_phi (from->dest);
 761   gphi *to_phi = get_virtual_phi (to->dest);
 762   if (from_phi)
 763     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 764                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 765 }
 766
 767
 768 /* Given LOOP this function generates a new copy of it and puts it
 769    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 770    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 771    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 772    entry or exit of LOOP.  */
 773
 774 struct loop *
 775 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 776                                         struct loop *scalar_loop, edge e)
 777 {
 778   struct loop *new_loop;
 779   basic_block *new_bbs, *bbs;
 780   bool at_exit;
 781   bool was_imm_dom;
 782   basic_block exit_dest;
 783   edge exit, new_exit;
 784   bool duplicate_outer_loop = false;
 785
 786   exit = single_exit (loop);
 787   at_exit = (e == exit);
 788   if (!at_exit && e != loop_preheader_edge (loop))
 789     return NULL;
 790
 791   if (scalar_loop == NULL)
 792     scalar_loop = loop;
 793
 794   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 795   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
 796   /* Allow duplication of outer loops.  */
 797   if (scalar_loop->inner)
 798     duplicate_outer_loop = true;
 799   /* Check whether duplication is possible.  */
 800   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
 801     {
 802       free (bbs);
 803       return NULL;
 804     }
 805
 806   /* Generate new loop structure.  */
 807   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 808   duplicate_subloops (scalar_loop, new_loop);
 809
 810   exit_dest = exit->dest;
 811   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 812                                           exit_dest) == loop->header ?
 813                  true : false);
 814
 815   /* Also copy the pre-header, this avoids jumping through hoops to
 816      duplicate the loop entry PHI arguments.  Create an empty
 817      pre-header unconditionally for this.  */
 818   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 819   edge entry_e = single_pred_edge (preheader);
 820   bbs[scalar_loop->num_nodes] = preheader;
 821   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 822
 823   exit = single_exit (scalar_loop);
 824   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 825             &exit, 1, &new_exit, NULL,
 826             e->src, true);
 827   exit = single_exit (loop);
 828   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
 829
 830   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 831
 832   if (scalar_loop != loop)
 833     {
 834       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 835          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 836          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 837          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 838          header) to have current_def set, so copy them over.  */
 839       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 840                                                 exit);
 841       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 842                                                            0),
 843                                                 EDGE_SUCC (loop->latch, 0));
 844     }
 845
 846   if (at_exit) /* Add the loop copy at exit.  */
 847     {
 848       if (scalar_loop != loop)
 849         {
 850           gphi_iterator gsi;
 851           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 852
 853           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 854                gsi_next (&gsi))
 855             {
 856               gphi *phi = gsi.phi ();
 857               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 858               location_t orig_locus
 859                 = gimple_phi_arg_location_from_edge (phi, e);
 860
 861               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 862             }
 863         }
 864       redirect_edge_and_branch_force (e, new_preheader);
 865       flush_pending_stmts (e);
 866       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 867       if (was_imm_dom || duplicate_outer_loop)
 868         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 869
 870       /* And remove the non-necessary forwarder again.  Keep the other
 871          one so we have a proper pre-header for the loop at the exit edge.  */
 872       redirect_edge_pred (single_succ_edge (preheader),
 873                           single_pred (preheader));
 874       delete_basic_block (preheader);
 875       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 876                                loop_preheader_edge (scalar_loop)->src);
 877     }
 878   else /* Add the copy at entry.  */
 879     {
 880       if (scalar_loop != loop)
 881         {
 882           /* Remove the non-necessary forwarder of scalar_loop again.  */
 883           redirect_edge_pred (single_succ_edge (preheader),
 884                               single_pred (preheader));
 885           delete_basic_block (preheader);
 886           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 887                                    loop_preheader_edge (scalar_loop)->src);
 888           preheader = split_edge (loop_preheader_edge (loop));
 889           entry_e = single_pred_edge (preheader);
 890         }
 891
 892       redirect_edge_and_branch_force (entry_e, new_preheader);
 893       flush_pending_stmts (entry_e);
 894       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 895
 896       redirect_edge_and_branch_force (new_exit, preheader);
 897       flush_pending_stmts (new_exit);
 898       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 899
 900       /* And remove the non-necessary forwarder again.  Keep the other
 901          one so we have a proper pre-header for the loop at the exit edge.  */
 902       redirect_edge_pred (single_succ_edge (new_preheader),
 903                           single_pred (new_preheader));
 904       delete_basic_block (new_preheader);
 905       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 906                                loop_preheader_edge (new_loop)->src);
 907     }
 908
 909   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 910     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 911
 912   if (scalar_loop != loop)
 913     {
 914       /* Update new_loop->header PHIs, so that on the preheader
 915          edge they are the ones from loop rather than scalar_loop.  */
 916       gphi_iterator gsi_orig, gsi_new;
 917       edge orig_e = loop_preheader_edge (loop);
 918       edge new_e = loop_preheader_edge (new_loop);
 919
 920       for (gsi_orig = gsi_start_phis (loop->header),
 921            gsi_new = gsi_start_phis (new_loop->header);
 922            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 923            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 924         {
 925           gphi *orig_phi = gsi_orig.phi ();
 926           gphi *new_phi = gsi_new.phi ();
 927           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 928           location_t orig_locus
 929             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 930
 931           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 932         }
 933     }
 934
 935   free (new_bbs);
 936   free (bbs);
 937
 938   checking_verify_dominators (CDI_DOMINATORS);
 939
 940   return new_loop;
 941 }
 942
 943
 944 /* Given the condition statement COND, put it as the last statement
 945    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 946    Assumes that this is the single exit of the guarded loop.
 947    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 948
 949 static edge
 950 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 951                        gimple_seq cond_expr_stmt_list,
 952                        basic_block exit_bb, basic_block dom_bb,
 953                        int probability)
 954 {
 955   gimple_stmt_iterator gsi;
 956   edge new_e, enter_e;
 957   gcond *cond_stmt;
 958   gimple_seq gimplify_stmt_list = NULL;
 959
 960   enter_e = EDGE_SUCC (guard_bb, 0);
 961   enter_e->flags &= ~EDGE_FALLTHRU;
 962   enter_e->flags |= EDGE_FALSE_VALUE;
 963   gsi = gsi_last_bb (guard_bb);
 964
 965   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 966                                  NULL_TREE);
 967   if (gimplify_stmt_list)
 968     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 969   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 970   if (cond_expr_stmt_list)
 971     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 972
 973   gsi = gsi_last_bb (guard_bb);
 974   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 975
 976   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 977   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 978
 979   new_e->count = guard_bb->count;
 980   new_e->probability = probability;
 981   new_e->count = apply_probability (enter_e->count, probability);
 982   enter_e->count -= new_e->count;
 983   enter_e->probability = inverse_probability (probability);
 984   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 985   return new_e;
 986 }
 987
 988
 989 /* This function verifies that the following restrictions apply to LOOP:
 990    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 991        for innermost loop and 5 basic blocks for outer-loop.
 992    (2) it is single entry, single exit
 993    (3) its exit condition is the last stmt in the header
 994    (4) E is the entry/exit edge of LOOP.
 995  */
 996
 997 bool
 998 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 999 {
1000   edge exit_e = single_exit (loop);
1001   edge entry_e = loop_preheader_edge (loop);
1002   gcond *orig_cond = get_loop_exit_condition (loop);
1003   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
1004   unsigned int num_bb = loop->inner? 5 : 2;
1005
1006       /* All loops have an outer scope; the only case loop->outer is NULL is for
1007          the function itself.  */
1008       if (!loop_outer (loop)
1009       || loop->num_nodes != num_bb
1010       || !empty_block_p (loop->latch)
1011       || !single_exit (loop)
1012       /* Verify that new loop exit condition can be trivially modified.  */
1013       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1014       || (e != exit_e && e != entry_e))
1015     return false;
1016
1017   return true;
1018 }
1019
1020 static void
1021 slpeel_checking_verify_cfg_after_peeling (struct loop *first_loop,
1022                                           struct loop *second_loop)
1023 {
1024   if (!flag_checking)
1025     return;
1026
1027   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1028   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1029   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1030
1031   /* A guard that controls whether the second_loop is to be executed or skipped
1032      is placed in first_loop->exit.  first_loop->exit therefore has two
1033      successors - one is the preheader of second_loop, and the other is a bb
1034      after second_loop.
1035    */
1036   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1037
1038   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1039         of second_loop.  */
1040
1041   /* The preheader of new_loop is expected to have two predecessors:
1042      first_loop->exit and the block that precedes first_loop.  */
1043
1044   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1045               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1046                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1047                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1048                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1049
1050   /* Verify that the other successor of first_loop->exit is after the
1051      second_loop.  */
1052   /* TODO */
1053 }
1054
1055 /* If the run time cost model check determines that vectorization is
1056    not profitable and hence scalar loop should be generated then set
1057    FIRST_NITERS to prologue peeled iterations. This will allow all the
1058    iterations to be executed in the prologue peeled scalar loop.  */
1059
1060 static void
1061 set_prologue_iterations (basic_block bb_before_first_loop,
1062                          tree *first_niters,
1063                          struct loop *loop,
1064                          unsigned int th,
1065                          int probability)
1066 {
1067   edge e;
1068   basic_block cond_bb, then_bb;
1069   tree var, prologue_after_cost_adjust_name;
1070   gimple_stmt_iterator gsi;
1071   gphi *newphi;
1072   edge e_true, e_false, e_fallthru;
1073   gcond *cond_stmt;
1074   gimple_seq stmts = NULL;
1075   tree cost_pre_condition = NULL_TREE;
1076   tree scalar_loop_iters =
1077     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1078
1079   e = single_pred_edge (bb_before_first_loop);
1080   cond_bb = split_edge (e);
1081
1082   e = single_pred_edge (bb_before_first_loop);
1083   then_bb = split_edge (e);
1084   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1085
1086   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1087                                    EDGE_FALSE_VALUE);
1088   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1089
1090   e_true = EDGE_PRED (then_bb, 0);
1091   e_true->flags &= ~EDGE_FALLTHRU;
1092   e_true->flags |= EDGE_TRUE_VALUE;
1093
1094   e_true->probability = probability;
1095   e_false->probability = inverse_probability (probability);
1096   e_true->count = apply_probability (cond_bb->count, probability);
1097   e_false->count = cond_bb->count - e_true->count;
1098   then_bb->frequency = EDGE_FREQUENCY (e_true);
1099   then_bb->count = e_true->count;
1100
1101   e_fallthru = EDGE_SUCC (then_bb, 0);
1102   e_fallthru->count = then_bb->count;
1103
1104   gsi = gsi_last_bb (cond_bb);
1105   cost_pre_condition =
1106     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1107                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1108   cost_pre_condition =
1109     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1110                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
1111   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1112                                            NULL_TREE, NULL_TREE);
1113   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1114
1115   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1116                         "prologue_after_cost_adjust");
1117   prologue_after_cost_adjust_name =
1118     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1119
1120   gsi = gsi_last_bb (then_bb);
1121   if (stmts)
1122     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1123
1124   newphi = create_phi_node (var, bb_before_first_loop);
1125   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1126                UNKNOWN_LOCATION);
1127   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1128
1129   *first_niters = PHI_RESULT (newphi);
1130 }
1131
1132 /* Function slpeel_tree_peel_loop_to_edge.
1133
1134    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1135    that is placed on the entry (exit) edge E of LOOP. After this transformation
1136    we have two loops one after the other - first-loop iterates FIRST_NITERS
1137    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1138    If the cost model indicates that it is profitable to emit a scalar
1139    loop instead of the vector one, then the prolog (epilog) loop will iterate
1140    for the entire unchanged scalar iterations of the loop.
1141
1142    Input:
1143    - LOOP: the loop to be peeled.
1144    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1145         should be copied.
1146    - E: the exit or entry edge of LOOP.
1147         If it is the entry edge, we peel the first iterations of LOOP. In this
1148         case first-loop is LOOP, and second-loop is the newly created loop.
1149         If it is the exit edge, we peel the last iterations of LOOP. In this
1150         case, first-loop is the newly created loop, and second-loop is LOOP.
1151    - NITERS: the number of iterations that LOOP iterates.
1152    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1153    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1154         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1155         is false, the caller of this function may want to take care of this
1156         (this can be useful if we don't want new stmts added to first-loop).
1157    - TH: cost model profitability threshold of iterations for vectorization.
1158    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1159                           during versioning and hence needs to occur during
1160                           prologue generation or whether cost model check
1161                           has not occurred during prologue generation and hence
1162                           needs to occur during epilogue generation.
1163    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1164    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1165
1166
1167    Output:
1168    The function returns a pointer to the new loop-copy, or NULL if it failed
1169    to perform the transformation.
1170
1171    The function generates two if-then-else guards: one before the first loop,
1172    and the other before the second loop:
1173    The first guard is:
1174      if (FIRST_NITERS == 0) then skip the first loop,
1175      and go directly to the second loop.
1176    The second guard is:
1177      if (FIRST_NITERS == NITERS) then skip the second loop.
1178
1179    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1180    then the generated condition is combined with COND_EXPR and the
1181    statements in COND_EXPR_STMT_LIST are emitted together with it.
1182
1183    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1184    FORNOW the resulting code will not be in loop-closed-ssa form.
1185 */
1186
1187 static struct loop *
1188 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1189                                edge e, tree *first_niters,
1190                                tree niters, bool update_first_loop_count,
1191                                unsigned int th, bool check_profitability,
1192                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1193                                int bound1, int bound2)
1194 {
1195   struct loop *new_loop = NULL, *first_loop, *second_loop;
1196   edge skip_e;
1197   tree pre_condition = NULL_TREE;
1198   basic_block bb_before_second_loop, bb_after_second_loop;
1199   basic_block bb_before_first_loop;
1200   basic_block bb_between_loops;
1201   basic_block new_exit_bb;
1202   gphi_iterator gsi;
1203   edge exit_e = single_exit (loop);
1204   source_location loop_loc;
1205   /* There are many aspects to how likely the first loop is going to be executed.
1206      Without histogram we can't really do good job.  Simply set it to
1207      2/3, so the first loop is not reordered to the end of function and
1208      the hot path through stays short.  */
1209   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1210   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1211   int probability_of_second_loop;
1212
1213   if (!slpeel_can_duplicate_loop_p (loop, e))
1214     return NULL;
1215
1216   /* We might have a queued need to update virtual SSA form.  As we
1217      delete the update SSA machinery below after doing a regular
1218      incremental SSA update during loop copying make sure we don't
1219      lose that fact.
1220      ???  Needing to update virtual SSA form by renaming is unfortunate
1221      but not all of the vectorizer code inserting new loads / stores
1222      properly assigns virtual operands to those statements.  */
1223   update_ssa (TODO_update_ssa_only_virtuals);
1224
1225   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1226      in the exit bb and rename all the uses after the loop.  This simplifies
1227      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1228      (but normally loop closed SSA form doesn't require virtual PHIs to be
1229      in the same form).  Doing this early simplifies the checking what
1230      uses should be renamed.  */
1231   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1232     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1233       {
1234         gphi *phi = gsi.phi ();
1235         for (gsi = gsi_start_phis (exit_e->dest);
1236              !gsi_end_p (gsi); gsi_next (&gsi))
1237           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1238             break;
1239         if (gsi_end_p (gsi))
1240           {
1241             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1242             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1243             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1244             imm_use_iterator imm_iter;
1245             gimple *stmt;
1246             use_operand_p use_p;
1247
1248             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1249             gimple_phi_set_result (new_phi, new_vop);
1250             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1251               if (stmt != new_phi
1252                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1253                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1254                   SET_USE (use_p, new_vop);
1255           }
1256         break;
1257       }
1258
1259   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1260         Resulting CFG would be:
1261
1262         first_loop:
1263         do {
1264         } while ...
1265
1266         second_loop:
1267         do {
1268         } while ...
1269
1270         orig_exit_bb:
1271    */
1272
1273   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1274                                                            e)))
1275     {
1276       loop_loc = find_loop_location (loop);
1277       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1278                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1279       return NULL;
1280     }
1281
1282   if (MAY_HAVE_DEBUG_STMTS)
1283     {
1284       gcc_assert (!adjust_vec.exists ());
1285       adjust_vec.create (32);
1286     }
1287
1288   if (e == exit_e)
1289     {
1290       /* NEW_LOOP was placed after LOOP.  */
1291       first_loop = loop;
1292       second_loop = new_loop;
1293     }
1294   else
1295     {
1296       /* NEW_LOOP was placed before LOOP.  */
1297       first_loop = new_loop;
1298       second_loop = loop;
1299     }
1300
1301   /* 2.  Add the guard code in one of the following ways:
1302
1303      2.a Add the guard that controls whether the first loop is executed.
1304          This occurs when this function is invoked for prologue or epilogue
1305          generation and when the cost model check can be done at compile time.
1306
1307          Resulting CFG would be:
1308
1309          bb_before_first_loop:
1310          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1311                                 GOTO first-loop
1312
1313          first_loop:
1314          do {
1315          } while ...
1316
1317          bb_before_second_loop:
1318
1319          second_loop:
1320          do {
1321          } while ...
1322
1323          orig_exit_bb:
1324
1325      2.b Add the cost model check that allows the prologue
1326          to iterate for the entire unchanged scalar
1327          iterations of the loop in the event that the cost
1328          model indicates that the scalar loop is more
1329          profitable than the vector one. This occurs when
1330          this function is invoked for prologue generation
1331          and the cost model check needs to be done at run
1332          time.
1333
1334          Resulting CFG after prologue peeling would be:
1335
1336          if (scalar_loop_iterations <= th)
1337            FIRST_NITERS = scalar_loop_iterations
1338
1339          bb_before_first_loop:
1340          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1341                                 GOTO first-loop
1342
1343          first_loop:
1344          do {
1345          } while ...
1346
1347          bb_before_second_loop:
1348
1349          second_loop:
1350          do {
1351          } while ...
1352
1353          orig_exit_bb:
1354
1355      2.c Add the cost model check that allows the epilogue
1356          to iterate for the entire unchanged scalar
1357          iterations of the loop in the event that the cost
1358          model indicates that the scalar loop is more
1359          profitable than the vector one. This occurs when
1360          this function is invoked for epilogue generation
1361          and the cost model check needs to be done at run
1362          time.  This check is combined with any pre-existing
1363          check in COND_EXPR to avoid versioning.
1364
1365          Resulting CFG after prologue peeling would be:
1366
1367          bb_before_first_loop:
1368          if ((scalar_loop_iterations <= th)
1369              ||
1370              FIRST_NITERS == 0) GOTO bb_before_second_loop
1371                                 GOTO first-loop
1372
1373          first_loop:
1374          do {
1375          } while ...
1376
1377          bb_before_second_loop:
1378
1379          second_loop:
1380          do {
1381          } while ...
1382
1383          orig_exit_bb:
1384   */
1385
1386   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1387   /* Loop copying insterted a forwarder block for us here.  */
1388   bb_before_second_loop = single_exit (first_loop)->dest;
1389
1390   probability_of_second_loop = (inverse_probability (first_guard_probability)
1391                                 + combine_probabilities (second_guard_probability,
1392                                                          first_guard_probability));
1393   /* Theoretically preheader edge of first loop and exit edge should have
1394      same frequencies.  Loop exit probablities are however easy to get wrong.
1395      It is safer to copy value from original loop entry.  */
1396   bb_before_second_loop->frequency
1397      = combine_probabilities (bb_before_first_loop->frequency,
1398                               probability_of_second_loop);
1399   bb_before_second_loop->count
1400      = apply_probability (bb_before_first_loop->count,
1401                           probability_of_second_loop);
1402   single_succ_edge (bb_before_second_loop)->count
1403      = bb_before_second_loop->count;
1404
1405   /* Epilogue peeling.  */
1406   if (!update_first_loop_count)
1407     {
1408       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1409       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1410       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1411       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1412         limit = limit + 1;
1413       if (check_profitability
1414           && th > limit)
1415         limit = th;
1416       pre_condition =
1417         fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1418                      build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1419       if (cond_expr)
1420         {
1421           pre_condition =
1422             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1423                          pre_condition,
1424                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1425                                       cond_expr));
1426         }
1427     }
1428
1429   /* Prologue peeling.  */
1430   else
1431     {
1432       if (check_profitability)
1433         set_prologue_iterations (bb_before_first_loop, first_niters,
1434                                  loop, th, first_guard_probability);
1435
1436       pre_condition =
1437         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1438                      build_int_cst (TREE_TYPE (*first_niters), 0));
1439     }
1440
1441   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1442                                   cond_expr_stmt_list,
1443                                   bb_before_second_loop, bb_before_first_loop,
1444                                   inverse_probability (first_guard_probability));
1445   scale_loop_profile (first_loop, first_guard_probability,
1446                       check_profitability && (int)th > bound1 ? th : bound1);
1447   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1448                                       first_loop == new_loop,
1449                                       &new_exit_bb);
1450
1451
1452   /* 3. Add the guard that controls whether the second loop is executed.
1453         Resulting CFG would be:
1454
1455         bb_before_first_loop:
1456         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1457                                GOTO first-loop
1458
1459         first_loop:
1460         do {
1461         } while ...
1462
1463         bb_between_loops:
1464         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1465                                     GOTO bb_before_second_loop
1466
1467         bb_before_second_loop:
1468
1469         second_loop:
1470         do {
1471         } while ...
1472
1473         bb_after_second_loop:
1474
1475         orig_exit_bb:
1476    */
1477
1478   bb_between_loops = new_exit_bb;
1479   bb_after_second_loop = split_edge (single_exit (second_loop));
1480
1481   pre_condition =
1482         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1483   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1484                                   bb_after_second_loop, bb_before_first_loop,
1485                                   inverse_probability (second_guard_probability));
1486   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1487   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1488                                      second_loop == new_loop, &new_exit_bb);
1489
1490   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1491    */
1492   if (update_first_loop_count)
1493     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1494
1495   delete_update_ssa ();
1496
1497   adjust_vec_debug_stmts ();
1498
1499   return new_loop;
1500 }
1501
1502 /* Function vect_get_loop_location.
1503
1504    Extract the location of the loop in the source code.
1505    If the loop is not well formed for vectorization, an estimated
1506    location is calculated.
1507    Return the loop location if succeed and NULL if not.  */
1508
1509 source_location
1510 find_loop_location (struct loop *loop)
1511 {
1512   gimple *stmt = NULL;
1513   basic_block bb;
1514   gimple_stmt_iterator si;
1515
1516   if (!loop)
1517     return UNKNOWN_LOCATION;
1518
1519   stmt = get_loop_exit_condition (loop);
1520
1521   if (stmt
1522       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1523     return gimple_location (stmt);
1524
1525   /* If we got here the loop is probably not "well formed",
1526      try to estimate the loop location */
1527
1528   if (!loop->header)
1529     return UNKNOWN_LOCATION;
1530
1531   bb = loop->header;
1532
1533   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1534     {
1535       stmt = gsi_stmt (si);
1536       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1537         return gimple_location (stmt);
1538     }
1539
1540   return UNKNOWN_LOCATION;
1541 }
1542
1543
1544 /* Function vect_can_advance_ivs_p
1545
1546    In case the number of iterations that LOOP iterates is unknown at compile
1547    time, an epilog loop will be generated, and the loop induction variables
1548    (IVs) will be "advanced" to the value they are supposed to take just before
1549    the epilog loop.  Here we check that the access function of the loop IVs
1550    and the expression that represents the loop bound are simple enough.
1551    These restrictions will be relaxed in the future.  */
1552
1553 bool
1554 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1555 {
1556   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1557   basic_block bb = loop->header;
1558   gimple *phi;
1559   gphi_iterator gsi;
1560
1561   /* Analyze phi functions of the loop header.  */
1562
1563   if (dump_enabled_p ())
1564     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1565   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1566     {
1567       tree evolution_part;
1568
1569       phi = gsi.phi ();
1570       if (dump_enabled_p ())
1571         {
1572           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1573           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1574         }
1575
1576       /* Skip virtual phi's. The data dependences that are associated with
1577          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1578
1579       if (virtual_operand_p (PHI_RESULT (phi)))
1580         {
1581           if (dump_enabled_p ())
1582             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1583                              "virtual phi. skip.\n");
1584           continue;
1585         }
1586
1587       /* Skip reduction phis.  */
1588
1589       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1590         {
1591           if (dump_enabled_p ())
1592             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1593                              "reduc phi. skip.\n");
1594           continue;
1595         }
1596
1597       /* Analyze the evolution function.  */
1598
1599       evolution_part
1600         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1601       if (evolution_part == NULL_TREE)
1602         {
1603           if (dump_enabled_p ())
1604             dump_printf (MSG_MISSED_OPTIMIZATION,
1605                          "No access function or evolution.\n");
1606           return false;
1607         }
1608
1609       /* FORNOW: We do not transform initial conditions of IVs
1610          which evolution functions are not invariants in the loop.  */
1611
1612       if (!expr_invariant_in_loop_p (loop, evolution_part))
1613         {
1614           if (dump_enabled_p ())
1615             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1616                              "evolution not invariant in loop.\n");
1617           return false;
1618         }
1619
1620       /* FORNOW: We do not transform initial conditions of IVs
1621          which evolution functions are a polynomial of degree >= 2.  */
1622
1623       if (tree_is_chrec (evolution_part))
1624         {
1625           if (dump_enabled_p ())
1626             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1627                              "evolution is chrec.\n");
1628           return false;
1629         }
1630     }
1631
1632   return true;
1633 }
1634
1635
1636 /*   Function vect_update_ivs_after_vectorizer.
1637
1638      "Advance" the induction variables of LOOP to the value they should take
1639      after the execution of LOOP.  This is currently necessary because the
1640      vectorizer does not handle induction variables that are used after the
1641      loop.  Such a situation occurs when the last iterations of LOOP are
1642      peeled, because:
1643      1. We introduced new uses after LOOP for IVs that were not originally used
1644         after LOOP: the IVs of LOOP are now used by an epilog loop.
1645      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1646         times, whereas the loop IVs should be bumped N times.
1647
1648      Input:
1649      - LOOP - a loop that is going to be vectorized. The last few iterations
1650               of LOOP were peeled.
1651      - NITERS - the number of iterations that LOOP executes (before it is
1652                 vectorized). i.e, the number of times the ivs should be bumped.
1653      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1654                   coming out from LOOP on which there are uses of the LOOP ivs
1655                   (this is the path from LOOP->exit to epilog_loop->preheader).
1656
1657                   The new definitions of the ivs are placed in LOOP->exit.
1658                   The phi args associated with the edge UPDATE_E in the bb
1659                   UPDATE_E->dest are updated accordingly.
1660
1661      Assumption 1: Like the rest of the vectorizer, this function assumes
1662      a single loop exit that has a single predecessor.
1663
1664      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1665      organized in the same order.
1666
1667      Assumption 3: The access function of the ivs is simple enough (see
1668      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1669
1670      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1671      coming out of LOOP on which the ivs of LOOP are used (this is the path
1672      that leads to the epilog loop; other paths skip the epilog loop).  This
1673      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1674      needs to have its phis updated.
1675  */
1676
1677 static void
1678 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1679                                   edge update_e)
1680 {
1681   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1682   basic_block exit_bb = single_exit (loop)->dest;
1683   gphi *phi, *phi1;
1684   gphi_iterator gsi, gsi1;
1685   basic_block update_bb = update_e->dest;
1686
1687   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1688
1689   /* Make sure there exists a single-predecessor exit bb:  */
1690   gcc_assert (single_pred_p (exit_bb));
1691
1692   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1693        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1694        gsi_next (&gsi), gsi_next (&gsi1))
1695     {
1696       tree init_expr;
1697       tree step_expr, off;
1698       tree type;
1699       tree var, ni, ni_name;
1700       gimple_stmt_iterator last_gsi;
1701       stmt_vec_info stmt_info;
1702
1703       phi = gsi.phi ();
1704       phi1 = gsi1.phi ();
1705       if (dump_enabled_p ())
1706         {
1707           dump_printf_loc (MSG_NOTE, vect_location,
1708                            "vect_update_ivs_after_vectorizer: phi: ");
1709           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1710         }
1711
1712       /* Skip virtual phi's.  */
1713       if (virtual_operand_p (PHI_RESULT (phi)))
1714         {
1715           if (dump_enabled_p ())
1716             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1717                              "virtual phi. skip.\n");
1718           continue;
1719         }
1720
1721       /* Skip reduction phis.  */
1722       stmt_info = vinfo_for_stmt (phi);
1723       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1724           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1725         {
1726           if (dump_enabled_p ())
1727             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1728                              "reduc phi. skip.\n");
1729           continue;
1730         }
1731
1732       type = TREE_TYPE (gimple_phi_result (phi));
1733       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1734       step_expr = unshare_expr (step_expr);
1735
1736       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1737          of degree >= 2 or exponential.  */
1738       gcc_assert (!tree_is_chrec (step_expr));
1739
1740       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1741
1742       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1743                          fold_convert (TREE_TYPE (step_expr), niters),
1744                          step_expr);
1745       if (POINTER_TYPE_P (type))
1746         ni = fold_build_pointer_plus (init_expr, off);
1747       else
1748         ni = fold_build2 (PLUS_EXPR, type,
1749                           init_expr, fold_convert (type, off));
1750
1751       var = create_tmp_var (type, "tmp");
1752
1753       last_gsi = gsi_last_bb (exit_bb);
1754       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1755                                           true, GSI_SAME_STMT);
1756
1757       /* Fix phi expressions in the successor bb.  */
1758       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1759     }
1760 }
1761
1762 /* Function vect_do_peeling_for_loop_bound
1763
1764    Peel the last iterations of the loop represented by LOOP_VINFO.
1765    The peeled iterations form a new epilog loop.  Given that the loop now
1766    iterates NITERS times, the new epilog loop iterates
1767    NITERS % VECTORIZATION_FACTOR times.
1768
1769    If CHECK_PROFITABILITY is 1 then profitability check is generated
1770    using TH as a cost model profitability threshold of iterations for
1771    vectorization.
1772
1773    The original loop will later be made to iterate
1774    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1775
1776    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1777    test.  */
1778
1779 void
1780 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1781                                 tree ni_name, tree ratio_mult_vf_name,
1782                                 unsigned int th, bool check_profitability)
1783 {
1784   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1785   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1786   struct loop *new_loop;
1787   edge update_e;
1788   basic_block preheader;
1789   int loop_num;
1790   int max_iter;
1791   tree cond_expr = NULL_TREE;
1792   gimple_seq cond_expr_stmt_list = NULL;
1793
1794   if (dump_enabled_p ())
1795     dump_printf_loc (MSG_NOTE, vect_location,
1796                      "=== vect_do_peeling_for_loop_bound ===\n");
1797
1798   initialize_original_copy_tables ();
1799
1800   loop_num  = loop->num;
1801
1802   new_loop
1803     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1804                                      &ratio_mult_vf_name, ni_name, false,
1805                                      th, check_profitability,
1806                                      cond_expr, cond_expr_stmt_list,
1807                                      0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1808   gcc_assert (new_loop);
1809   gcc_assert (loop_num == loop->num);
1810   slpeel_checking_verify_cfg_after_peeling (loop, new_loop);
1811
1812   /* A guard that controls whether the new_loop is to be executed or skipped
1813      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1814      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1815      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1816      is on the path where the LOOP IVs are used and need to be updated.  */
1817
1818   preheader = loop_preheader_edge (new_loop)->src;
1819   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1820     update_e = EDGE_PRED (preheader, 0);
1821   else
1822     update_e = EDGE_PRED (preheader, 1);
1823
1824   /* Update IVs of original loop as if they were advanced
1825      by ratio_mult_vf_name steps.  */
1826   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1827
1828   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1829      and this means N-2 loopback edge executions.
1830
1831      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1832      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1833   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1834               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1835               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1836   if (check_profitability)
1837     max_iter = MAX (max_iter, (int) th - 1);
1838   record_niter_bound (new_loop, max_iter, false, true);
1839   dump_printf (MSG_NOTE,
1840                "Setting upper bound of nb iterations for epilogue "
1841                "loop to %d\n", max_iter);
1842
1843   /* After peeling we have to reset scalar evolution analyzer.  */
1844   scev_reset ();
1845
1846   free_original_copy_tables ();
1847 }
1848
1849
1850 /* Function vect_gen_niters_for_prolog_loop
1851
1852    Set the number of iterations for the loop represented by LOOP_VINFO
1853    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1854    and the misalignment of DR - the data reference recorded in
1855    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1856    this loop, the data reference DR will refer to an aligned location.
1857
1858    The following computation is generated:
1859
1860    If the misalignment of DR is known at compile time:
1861      addr_mis = int mis = DR_MISALIGNMENT (dr);
1862    Else, compute address misalignment in bytes:
1863      addr_mis = addr & (vectype_align - 1)
1864
1865    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1866
1867    (elem_size = element type size; an element is the scalar element whose type
1868    is the inner type of the vectype)
1869
1870    When the step of the data-ref in the loop is not 1 (as in interleaved data
1871    and SLP), the number of iterations of the prolog must be divided by the step
1872    (which is equal to the size of interleaved group).
1873
1874    The above formulas assume that VF == number of elements in the vector. This
1875    may not hold when there are multiple-types in the loop.
1876    In this case, for some data-references in the loop the VF does not represent
1877    the number of elements that fit in the vector.  Therefore, instead of VF we
1878    use TYPE_VECTOR_SUBPARTS.  */
1879
1880 static tree
1881 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1882 {
1883   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1884   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1885   tree var;
1886   gimple_seq stmts;
1887   tree iters, iters_name;
1888   edge pe;
1889   basic_block new_bb;
1890   gimple *dr_stmt = DR_STMT (dr);
1891   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1892   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1893   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1894   tree niters_type = TREE_TYPE (loop_niters);
1895   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1896
1897   pe = loop_preheader_edge (loop);
1898
1899   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1900     {
1901       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1902
1903       if (dump_enabled_p ())
1904         dump_printf_loc (MSG_NOTE, vect_location,
1905                          "known peeling = %d.\n", npeel);
1906
1907       iters = build_int_cst (niters_type, npeel);
1908       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1909     }
1910   else
1911     {
1912       gimple_seq new_stmts = NULL;
1913       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1914       tree offset = negative
1915           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1916       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1917                                                 &new_stmts, offset, loop);
1918       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1919       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1920       HOST_WIDE_INT elem_size =
1921                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1922       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1923       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1924       tree nelements_tree = build_int_cst (type, nelements);
1925       tree byte_misalign;
1926       tree elem_misalign;
1927
1928       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1929       gcc_assert (!new_bb);
1930
1931       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1932       byte_misalign =
1933         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1934                      vectype_align_minus_1);
1935
1936       /* Create:  elem_misalign = byte_misalign / element_size  */
1937       elem_misalign =
1938         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1939
1940       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1941       if (negative)
1942         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1943       else
1944         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1945       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1946       iters = fold_convert (niters_type, iters);
1947       *bound = nelements;
1948     }
1949
1950   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1951   /* If the loop bound is known at compile time we already verified that it is
1952      greater than vf; since the misalignment ('iters') is at most vf, there's
1953      no need to generate the MIN_EXPR in this case.  */
1954   if (TREE_CODE (loop_niters) != INTEGER_CST)
1955     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1956
1957   if (dump_enabled_p ())
1958     {
1959       dump_printf_loc (MSG_NOTE, vect_location,
1960                        "niters for prolog loop: ");
1961       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1962       dump_printf (MSG_NOTE, "\n");
1963     }
1964
1965   var = create_tmp_var (niters_type, "prolog_loop_niters");
1966   stmts = NULL;
1967   iters_name = force_gimple_operand (iters, &stmts, false, var);
1968
1969   /* Insert stmt on loop preheader edge.  */
1970   if (stmts)
1971     {
1972       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1973       gcc_assert (!new_bb);
1974     }
1975
1976   return iters_name;
1977 }
1978
1979
1980 /* Function vect_update_init_of_dr
1981
1982    NITERS iterations were peeled from LOOP.  DR represents a data reference
1983    in LOOP.  This function updates the information recorded in DR to
1984    account for the fact that the first NITERS iterations had already been
1985    executed.  Specifically, it updates the OFFSET field of DR.  */
1986
1987 static void
1988 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1989 {
1990   tree offset = DR_OFFSET (dr);
1991
1992   niters = fold_build2 (MULT_EXPR, sizetype,
1993                         fold_convert (sizetype, niters),
1994                         fold_convert (sizetype, DR_STEP (dr)));
1995   offset = fold_build2 (PLUS_EXPR, sizetype,
1996                         fold_convert (sizetype, offset), niters);
1997   DR_OFFSET (dr) = offset;
1998 }
1999
2000
2001 /* Function vect_update_inits_of_drs
2002
2003    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2004    This function updates the information recorded for the data references in
2005    the loop to account for the fact that the first NITERS iterations had
2006    already been executed.  Specifically, it updates the initial_condition of
2007    the access_function of all the data_references in the loop.  */
2008
2009 static void
2010 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2011 {
2012   unsigned int i;
2013   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2014   struct data_reference *dr;
2015
2016  if (dump_enabled_p ())
2017     dump_printf_loc (MSG_NOTE, vect_location,
2018                      "=== vect_update_inits_of_dr ===\n");
2019
2020   FOR_EACH_VEC_ELT (datarefs, i, dr)
2021     vect_update_init_of_dr (dr, niters);
2022 }
2023
2024
2025 /* Function vect_do_peeling_for_alignment
2026
2027    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2028    'niters' is set to the misalignment of one of the data references in the
2029    loop, thereby forcing it to refer to an aligned location at the beginning
2030    of the execution of this loop.  The data reference for which we are
2031    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.
2032
2033    If CHECK_PROFITABILITY is 1 then profitability check is generated
2034    using TH as a cost model profitability threshold of iterations for
2035    vectorization.  */
2036
2037 void
2038 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2039                                unsigned int th, bool check_profitability)
2040 {
2041   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2042   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2043   tree niters_of_prolog_loop;
2044   tree wide_prolog_niters;
2045   struct loop *new_loop;
2046   int max_iter;
2047   int bound = 0;
2048
2049   if (dump_enabled_p ())
2050     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2051                      "loop peeled for vectorization to enhance"
2052                      " alignment\n");
2053
2054   initialize_original_copy_tables ();
2055
2056   gimple_seq stmts = NULL;
2057   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2058   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2059                                                            ni_name,
2060                                                            &bound);
2061
2062   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2063   new_loop =
2064     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2065                                    loop_preheader_edge (loop),
2066                                    &niters_of_prolog_loop, ni_name, true,
2067                                    th, check_profitability, NULL_TREE, NULL,
2068                                    bound, 0);
2069
2070   gcc_assert (new_loop);
2071   slpeel_checking_verify_cfg_after_peeling (new_loop, loop);
2072   /* For vectorization factor N, we need to copy at most N-1 values
2073      for alignment and this means N-2 loopback edge executions.  */
2074   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2075   if (check_profitability)
2076     max_iter = MAX (max_iter, (int) th - 1);
2077   record_niter_bound (new_loop, max_iter, false, true);
2078   dump_printf (MSG_NOTE,
2079                "Setting upper bound of nb iterations for prologue "
2080                "loop to %d\n", max_iter);
2081
2082   /* Update number of times loop executes.  */
2083   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2084                 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2085   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2086                 TREE_TYPE (ni_name),
2087                 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2088
2089   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2090     wide_prolog_niters = niters_of_prolog_loop;
2091   else
2092     {
2093       gimple_seq seq = NULL;
2094       edge pe = loop_preheader_edge (loop);
2095       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2096       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2097       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2098                                                  var);
2099       if (seq)
2100         {
2101           /* Insert stmt on loop preheader edge.  */
2102           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2103           gcc_assert (!new_bb);
2104         }
2105     }
2106
2107   /* Update the init conditions of the access functions of all data refs.  */
2108   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2109
2110   /* After peeling we have to reset scalar evolution analyzer.  */
2111   scev_reset ();
2112
2113   free_original_copy_tables ();
2114 }
2115
2116 /* Function vect_create_cond_for_niters_checks.
2117
2118    Create a conditional expression that represents the run-time checks for
2119    loop's niter.  The loop is guaranteed to to terminate if the run-time
2120    checks hold.
2121
2122    Input:
2123    COND_EXPR  - input conditional expression.  New conditions will be chained
2124                 with logical AND operation.  If it is NULL, then the function
2125                 is used to return the number of alias checks.
2126    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2127                 to be checked.
2128
2129    Output:
2130    COND_EXPR - conditional expression.
2131
2132    The returned COND_EXPR is the conditional expression to be used in the
2133    if statement that controls which version of the loop gets executed at
2134    runtime.  */
2135
2136 static void
2137 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2138 {
2139   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2140
2141   if (*cond_expr)
2142     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2143                               *cond_expr, part_cond_expr);
2144   else
2145     *cond_expr = part_cond_expr;
2146 }
2147
2148 /* Function vect_create_cond_for_align_checks.
2149
2150    Create a conditional expression that represents the alignment checks for
2151    all of data references (array element references) whose alignment must be
2152    checked at runtime.
2153
2154    Input:
2155    COND_EXPR  - input conditional expression.  New conditions will be chained
2156                 with logical AND operation.
2157    LOOP_VINFO - two fields of the loop information are used.
2158                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2159                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2160
2161    Output:
2162    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2163                          expression.
2164    The returned value is the conditional expression to be used in the if
2165    statement that controls which version of the loop gets executed at runtime.
2166
2167    The algorithm makes two assumptions:
2168      1) The number of bytes "n" in a vector is a power of 2.
2169      2) An address "a" is aligned if a%n is zero and that this
2170         test can be done as a&(n-1) == 0.  For example, for 16
2171         byte vectors the test is a&0xf == 0.  */
2172
2173 static void
2174 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2175                                    tree *cond_expr,
2176                                    gimple_seq *cond_expr_stmt_list)
2177 {
2178   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2179   vec<gimple *> may_misalign_stmts
2180     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2181   gimple *ref_stmt;
2182   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2183   tree mask_cst;
2184   unsigned int i;
2185   tree int_ptrsize_type;
2186   char tmp_name[20];
2187   tree or_tmp_name = NULL_TREE;
2188   tree and_tmp_name;
2189   gimple *and_stmt;
2190   tree ptrsize_zero;
2191   tree part_cond_expr;
2192
2193   /* Check that mask is one less than a power of 2, i.e., mask is
2194      all zeros followed by all ones.  */
2195   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2196
2197   int_ptrsize_type = signed_type_for (ptr_type_node);
2198
2199   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2200      of the first vector of the i'th data reference. */
2201
2202   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2203     {
2204       gimple_seq new_stmt_list = NULL;
2205       tree addr_base;
2206       tree addr_tmp_name;
2207       tree new_or_tmp_name;
2208       gimple *addr_stmt, *or_stmt;
2209       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2210       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2211       bool negative = tree_int_cst_compare
2212         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2213       tree offset = negative
2214         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
2215
2216       /* create: addr_tmp = (int)(address_of_first_vector) */
2217       addr_base =
2218         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2219                                               offset, loop);
2220       if (new_stmt_list != NULL)
2221         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2222
2223       sprintf (tmp_name, "addr2int%d", i);
2224       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2225       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2226       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2227
2228       /* The addresses are OR together.  */
2229
2230       if (or_tmp_name != NULL_TREE)
2231         {
2232           /* create: or_tmp = or_tmp | addr_tmp */
2233           sprintf (tmp_name, "orptrs%d", i);
2234           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2235           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2236                                          or_tmp_name, addr_tmp_name);
2237           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2238           or_tmp_name = new_or_tmp_name;
2239         }
2240       else
2241         or_tmp_name = addr_tmp_name;
2242
2243     } /* end for i */
2244
2245   mask_cst = build_int_cst (int_ptrsize_type, mask);
2246
2247   /* create: and_tmp = or_tmp & mask  */
2248   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2249
2250   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2251                                   or_tmp_name, mask_cst);
2252   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2253
2254   /* Make and_tmp the left operand of the conditional test against zero.
2255      if and_tmp has a nonzero bit then some address is unaligned.  */
2256   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2257   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2258                                 and_tmp_name, ptrsize_zero);
2259   if (*cond_expr)
2260     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2261                               *cond_expr, part_cond_expr);
2262   else
2263     *cond_expr = part_cond_expr;
2264 }
2265
2266 /* Function vect_create_cond_for_alias_checks.
2267
2268    Create a conditional expression that represents the run-time checks for
2269    overlapping of address ranges represented by a list of data references
2270    relations passed as input.
2271
2272    Input:
2273    COND_EXPR  - input conditional expression.  New conditions will be chained
2274                 with logical AND operation.  If it is NULL, then the function
2275                 is used to return the number of alias checks.
2276    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2277                 to be checked.
2278
2279    Output:
2280    COND_EXPR - conditional expression.
2281
2282    The returned COND_EXPR is the conditional expression to be used in the if
2283    statement that controls which version of the loop gets executed at runtime.
2284 */
2285
2286 void
2287 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2288 {
2289   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2290     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2291   tree part_cond_expr;
2292
2293   /* Create expression
2294      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2295      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2296      &&
2297      ...
2298      &&
2299      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2300      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2301
2302   if (comp_alias_ddrs.is_empty ())
2303     return;
2304
2305   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2306     {
2307       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2308       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2309       tree segment_length_a = dr_a.seg_len;
2310       tree segment_length_b = dr_b.seg_len;
2311       tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2312       tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2313       tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2314
2315       offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2316                               offset_a, DR_INIT (dr_a.dr));
2317       offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2318                               offset_b, DR_INIT (dr_b.dr));
2319       addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2320       addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2321
2322       if (dump_enabled_p ())
2323         {
2324           dump_printf_loc (MSG_NOTE, vect_location,
2325                            "create runtime check for data references ");
2326           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2327           dump_printf (MSG_NOTE, " and ");
2328           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2329           dump_printf (MSG_NOTE, "\n");
2330         }
2331
2332       tree seg_a_min = addr_base_a;
2333       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2334       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2335          bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2336          [a, a+12) */
2337       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2338         {
2339           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2340           seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2341           seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2342         }
2343
2344       tree seg_b_min = addr_base_b;
2345       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2346       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2347         {
2348           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2349           seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2350           seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2351         }
2352
2353       part_cond_expr =
2354         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2355           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2356           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2357
2358       if (*cond_expr)
2359         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2360                                   *cond_expr, part_cond_expr);
2361       else
2362         *cond_expr = part_cond_expr;
2363     }
2364
2365   if (dump_enabled_p ())
2366     dump_printf_loc (MSG_NOTE, vect_location,
2367                      "created %u versioning for alias checks.\n",
2368                      comp_alias_ddrs.length ());
2369 }
2370
2371
2372 /* Function vect_loop_versioning.
2373
2374    If the loop has data references that may or may not be aligned or/and
2375    has data reference relations whose independence was not proven then
2376    two versions of the loop need to be generated, one which is vectorized
2377    and one which isn't.  A test is then generated to control which of the
2378    loops is executed.  The test checks for the alignment of all of the
2379    data references that may or may not be aligned.  An additional
2380    sequence of runtime tests is generated for each pairs of DDRs whose
2381    independence was not proven.  The vectorized version of loop is
2382    executed only if both alias and alignment tests are passed.
2383
2384    The test generated to check which version of loop is executed
2385    is modified to also check for profitability as indicated by the
2386    cost model threshold TH.
2387
2388    The versioning precondition(s) are placed in *COND_EXPR and
2389    *COND_EXPR_STMT_LIST.  */
2390
2391 void
2392 vect_loop_versioning (loop_vec_info loop_vinfo,
2393                       unsigned int th, bool check_profitability)
2394 {
2395   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2396   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2397   basic_block condition_bb;
2398   gphi_iterator gsi;
2399   gimple_stmt_iterator cond_exp_gsi;
2400   basic_block merge_bb;
2401   basic_block new_exit_bb;
2402   edge new_exit_e, e;
2403   gphi *orig_phi, *new_phi;
2404   tree cond_expr = NULL_TREE;
2405   gimple_seq cond_expr_stmt_list = NULL;
2406   tree arg;
2407   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2408   gimple_seq gimplify_stmt_list = NULL;
2409   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2410   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2411   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2412   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2413
2414   if (check_profitability)
2415     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2416                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2417                                                        th));
2418
2419   if (version_niter)
2420     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2421
2422   if (cond_expr)
2423     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2424                                         is_gimple_condexpr, NULL_TREE);
2425
2426   if (version_align)
2427     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2428                                        &cond_expr_stmt_list);
2429
2430   if (version_alias)
2431     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2432
2433   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2434                                       is_gimple_condexpr, NULL_TREE);
2435   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2436
2437   initialize_original_copy_tables ();
2438   if (scalar_loop)
2439     {
2440       edge scalar_e;
2441       basic_block preheader, scalar_preheader;
2442
2443       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2444          scale LOOP's frequencies instead.  */
2445       nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2446                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2447       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2448       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2449          while we need to move it above LOOP's preheader.  */
2450       e = loop_preheader_edge (loop);
2451       scalar_e = loop_preheader_edge (scalar_loop);
2452       gcc_assert (empty_block_p (e->src)
2453                   && single_pred_p (e->src));
2454       gcc_assert (empty_block_p (scalar_e->src)
2455                   && single_pred_p (scalar_e->src));
2456       gcc_assert (single_pred_p (condition_bb));
2457       preheader = e->src;
2458       scalar_preheader = scalar_e->src;
2459       scalar_e = find_edge (condition_bb, scalar_preheader);
2460       e = single_pred_edge (preheader);
2461       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2462                                       scalar_preheader);
2463       redirect_edge_and_branch_force (scalar_e, preheader);
2464       redirect_edge_and_branch_force (e, condition_bb);
2465       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2466                                single_pred (condition_bb));
2467       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2468                                single_pred (scalar_preheader));
2469       set_immediate_dominator (CDI_DOMINATORS, preheader,
2470                                condition_bb);
2471     }
2472   else
2473     nloop = loop_version (loop, cond_expr, &condition_bb,
2474                           prob, prob, REG_BR_PROB_BASE - prob, true);
2475
2476   if (version_niter)
2477     {
2478       /* The versioned loop could be infinite, we need to clear existing
2479          niter information which is copied from the original loop.  */
2480       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2481       vect_free_loop_info_assumptions (nloop);
2482       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2483       loop_constraint_set (loop, LOOP_C_INFINITE);
2484     }
2485
2486   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2487       && dump_enabled_p ())
2488     {
2489       if (version_alias)
2490         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2491                          "loop versioned for vectorization because of "
2492                          "possible aliasing\n");
2493       if (version_align)
2494         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2495                          "loop versioned for vectorization to enhance "
2496                          "alignment\n");
2497
2498     }
2499   free_original_copy_tables ();
2500
2501   /* Loop versioning violates an assumption we try to maintain during
2502      vectorization - that the loop exit block has a single predecessor.
2503      After versioning, the exit block of both loop versions is the same
2504      basic block (i.e. it has two predecessors). Just in order to simplify
2505      following transformations in the vectorizer, we fix this situation
2506      here by adding a new (empty) block on the exit-edge of the loop,
2507      with the proper loop-exit phis to maintain loop-closed-form.
2508      If loop versioning wasn't done from loop, but scalar_loop instead,
2509      merge_bb will have already just a single successor.  */
2510
2511   merge_bb = single_exit (loop)->dest;
2512   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2513     {
2514       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2515       new_exit_bb = split_edge (single_exit (loop));
2516       new_exit_e = single_exit (loop);
2517       e = EDGE_SUCC (new_exit_bb, 0);
2518
2519       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2520         {
2521           tree new_res;
2522           orig_phi = gsi.phi ();
2523           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2524           new_phi = create_phi_node (new_res, new_exit_bb);
2525           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2526           add_phi_arg (new_phi, arg, new_exit_e,
2527                        gimple_phi_arg_location_from_edge (orig_phi, e));
2528           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2529         }
2530     }
2531
2532   /* End loop-exit-fixes after versioning.  */
2533
2534   if (cond_expr_stmt_list)
2535     {
2536       cond_exp_gsi = gsi_last_bb (condition_bb);
2537       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2538                              GSI_SAME_STMT);
2539     }
2540   update_ssa (TODO_update_ssa);
2541 }