gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "hash-set.h"
  28 #include "machmode.h"
  29 #include "vec.h"
  30 #include "double-int.h"
  31 #include "input.h"
  32 #include "alias.h"
  33 #include "symtab.h"
  34 #include "wide-int.h"
  35 #include "inchash.h"
  36 #include "tree.h"
  37 #include "fold-const.h"
  38 #include "predict.h"
  39 #include "hard-reg-set.h"
  40 #include "input.h"
  41 #include "function.h"
  42 #include "dominance.h"
  43 #include "cfg.h"
  44 #include "cfganal.h"
  45 #include "basic-block.h"
  46 #include "gimple-pretty-print.h"
  47 #include "tree-ssa-alias.h"
  48 #include "internal-fn.h"
  49 #include "gimple-expr.h"
  50 #include "is-a.h"
  51 #include "gimple.h"
  52 #include "gimplify.h"
  53 #include "gimple-iterator.h"
  54 #include "gimplify-me.h"
  55 #include "gimple-ssa.h"
  56 #include "tree-cfg.h"
  57 #include "tree-phinodes.h"
  58 #include "ssa-iterators.h"
  59 #include "stringpool.h"
  60 #include "tree-ssanames.h"
  61 #include "tree-ssa-loop-manip.h"
  62 #include "tree-into-ssa.h"
  63 #include "tree-ssa.h"
  64 #include "tree-pass.h"
  65 #include "cfgloop.h"
  66 #include "diagnostic-core.h"
  67 #include "tree-scalar-evolution.h"
  68 #include "tree-vectorizer.h"
  69 #include "langhooks.h"
  70
  71 /*************************************************************************
  72   Simple Loop Peeling Utilities
  73
  74   Utilities to support loop peeling for vectorization purposes.
  75  *************************************************************************/
  76
  77
  78 /* Renames the use *OP_P.  */
  79
  80 static void
  81 rename_use_op (use_operand_p op_p)
  82 {
  83   tree new_name;
  84
  85   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  86     return;
  87
  88   new_name = get_current_def (USE_FROM_PTR (op_p));
  89
  90   /* Something defined outside of the loop.  */
  91   if (!new_name)
  92     return;
  93
  94   /* An ordinary ssa name defined in the loop.  */
  95
  96   SET_USE (op_p, new_name);
  97 }
  98
  99
 100 /* Renames the variables in basic block BB.  */
 101
 102 static void
 103 rename_variables_in_bb (basic_block bb)
 104 {
 105   gimple stmt;
 106   use_operand_p use_p;
 107   ssa_op_iter iter;
 108   edge e;
 109   edge_iterator ei;
 110   struct loop *loop = bb->loop_father;
 111
 112   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
 113        gsi_next (&gsi))
 114     {
 115       stmt = gsi_stmt (gsi);
 116       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 117         rename_use_op (use_p);
 118     }
 119
 120   FOR_EACH_EDGE (e, ei, bb->preds)
 121     {
 122       if (!flow_bb_inside_loop_p (loop, e->src))
 123         continue;
 124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 125            gsi_next (&gsi))
 126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 127     }
 128 }
 129
 130
 131 typedef struct
 132 {
 133   tree from, to;
 134   basic_block bb;
 135 } adjust_info;
 136
 137 /* A stack of values to be adjusted in debug stmts.  We have to
 138    process them LIFO, so that the closest substitution applies.  If we
 139    processed them FIFO, without the stack, we might substitute uses
 140    with a PHI DEF that would soon become non-dominant, and when we got
 141    to the suitable one, it wouldn't have anything to substitute any
 142    more.  */
 143 static vec<adjust_info, va_heap> adjust_vec;
 144
 145 /* Adjust any debug stmts that referenced AI->from values to use the
 146    loop-closed AI->to, if the references are dominated by AI->bb and
 147    not by the definition of AI->from.  */
 148
 149 static void
 150 adjust_debug_stmts_now (adjust_info *ai)
 151 {
 152   basic_block bbphi = ai->bb;
 153   tree orig_def = ai->from;
 154   tree new_def = ai->to;
 155   imm_use_iterator imm_iter;
 156   gimple stmt;
 157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 158
 159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 160
 161   /* Adjust any debug stmts that held onto non-loop-closed
 162      references.  */
 163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 164     {
 165       use_operand_p use_p;
 166       basic_block bbuse;
 167
 168       if (!is_gimple_debug (stmt))
 169         continue;
 170
 171       gcc_assert (gimple_debug_bind_p (stmt));
 172
 173       bbuse = gimple_bb (stmt);
 174
 175       if ((bbuse == bbphi
 176            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 177           && !(bbuse == bbdef
 178                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 179         {
 180           if (new_def)
 181             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 182               SET_USE (use_p, new_def);
 183           else
 184             {
 185               gimple_debug_bind_reset_value (stmt);
 186               update_stmt (stmt);
 187             }
 188         }
 189     }
 190 }
 191
 192 /* Adjust debug stmts as scheduled before.  */
 193
 194 static void
 195 adjust_vec_debug_stmts (void)
 196 {
 197   if (!MAY_HAVE_DEBUG_STMTS)
 198     return;
 199
 200   gcc_assert (adjust_vec.exists ());
 201
 202   while (!adjust_vec.is_empty ())
 203     {
 204       adjust_debug_stmts_now (&adjust_vec.last ());
 205       adjust_vec.pop ();
 206     }
 207
 208   adjust_vec.release ();
 209 }
 210
 211 /* Adjust any debug stmts that referenced FROM values to use the
 212    loop-closed TO, if the references are dominated by BB and not by
 213    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 214    will be postponed until adjust_vec_debug_stmts is called.  */
 215
 216 static void
 217 adjust_debug_stmts (tree from, tree to, basic_block bb)
 218 {
 219   adjust_info ai;
 220
 221   if (MAY_HAVE_DEBUG_STMTS
 222       && TREE_CODE (from) == SSA_NAME
 223       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 224       && ! virtual_operand_p (from))
 225     {
 226       ai.from = from;
 227       ai.to = to;
 228       ai.bb = bb;
 229
 230       if (adjust_vec.exists ())
 231         adjust_vec.safe_push (ai);
 232       else
 233         adjust_debug_stmts_now (&ai);
 234     }
 235 }
 236
 237 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 238    to adjust any debug stmts that referenced the old phi arg,
 239    presumably non-loop-closed references left over from other
 240    transformations.  */
 241
 242 static void
 243 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 244 {
 245   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 246
 247   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 248
 249   if (MAY_HAVE_DEBUG_STMTS)
 250     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 251                         gimple_bb (update_phi));
 252 }
 253
 254
 255 /* Update PHI nodes for a guard of the LOOP.
 256
 257    Input:
 258    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 259         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 260         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 261         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 262         We denote this bb NEW_MERGE_BB because before the guard code was added
 263         it had a single predecessor (the LOOP header), and now it became a merge
 264         point of two paths - the path that ends with the LOOP exit-edge, and
 265         the path that ends with GUARD_EDGE.
 266    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 267         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 268
 269    ===> The CFG before the guard-code was added:
 270         LOOP_header_bb:
 271           loop_body
 272           if (exit_loop) goto update_bb
 273           else           goto LOOP_header_bb
 274         update_bb:
 275
 276    ==> The CFG after the guard-code was added:
 277         guard_bb:
 278           if (LOOP_guard_condition) goto new_merge_bb
 279           else                      goto LOOP_header_bb
 280         LOOP_header_bb:
 281           loop_body
 282           if (exit_loop_condition) goto new_merge_bb
 283           else                     goto LOOP_header_bb
 284         new_merge_bb:
 285           goto update_bb
 286         update_bb:
 287
 288    ==> The CFG after this function:
 289         guard_bb:
 290           if (LOOP_guard_condition) goto new_merge_bb
 291           else                      goto LOOP_header_bb
 292         LOOP_header_bb:
 293           loop_body
 294           if (exit_loop_condition) goto new_exit_bb
 295           else                     goto LOOP_header_bb
 296         new_exit_bb:
 297         new_merge_bb:
 298           goto update_bb
 299         update_bb:
 300
 301    This function:
 302    1. creates and updates the relevant phi nodes to account for the new
 303       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 304       1.1. Create phi nodes at NEW_MERGE_BB.
 305       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 306            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 307    2. preserves loop-closed-ssa-form by creating the required phi nodes
 308       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 309
 310    There are two flavors to this function:
 311
 312    slpeel_update_phi_nodes_for_guard1:
 313      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 314      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 315      for variables that have phis in the loop header.
 316
 317    slpeel_update_phi_nodes_for_guard2:
 318      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 319      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 320      for variables that have phis in the loop exit.
 321
 322    I.E., the overall structure is:
 323
 324         loop1_preheader_bb:
 325                 guard1 (goto loop1/merge1_bb)
 326         loop1
 327         loop1_exit_bb:
 328                 guard2 (goto merge1_bb/merge2_bb)
 329         merge1_bb
 330         loop2
 331         loop2_exit_bb
 332         merge2_bb
 333         next_bb
 334
 335    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 336    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 337    that have phis in loop1->header).
 338
 339    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 340    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 341    that have phis in next_bb). It also adds some of these phis to
 342    loop1_exit_bb.
 343
 344    slpeel_update_phi_nodes_for_guard1 is always called before
 345    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 346    to create correct data-flow and loop-closed-ssa-form.
 347
 348    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 349    that change between iterations of a loop (and therefore have a phi-node
 350    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 351    phis for variables that are used out of the loop (and therefore have
 352    loop-closed exit phis). Some variables may be both updated between
 353    iterations and used after the loop. This is why in loop1_exit_bb we
 354    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 355    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 356
 357    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 358      an original loop. i.e., we have:
 359
 360            orig_loop
 361            guard_bb (goto LOOP/new_merge)
 362            new_loop <-- LOOP
 363            new_exit
 364            new_merge
 365            next_bb
 366
 367      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 368      have:
 369
 370            new_loop
 371            guard_bb (goto LOOP/new_merge)
 372            orig_loop <-- LOOP
 373            new_exit
 374            new_merge
 375            next_bb
 376
 377      The SSA names defined in the original loop have a current
 378      reaching definition that that records the corresponding new
 379      ssa-name used in the new duplicated loop copy.
 380   */
 381
 382 /* Function slpeel_update_phi_nodes_for_guard1
 383
 384    Input:
 385    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 386    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 387             information.
 388
 389    In the context of the overall structure, we have:
 390
 391         loop1_preheader_bb:
 392                 guard1 (goto loop1/merge1_bb)
 393 LOOP->  loop1
 394         loop1_exit_bb:
 395                 guard2 (goto merge1_bb/merge2_bb)
 396         merge1_bb
 397         loop2
 398         loop2_exit_bb
 399         merge2_bb
 400         next_bb
 401
 402    For each name updated between loop iterations (i.e - for each name that has
 403    an entry (loop-header) phi in LOOP) we create a new phi in:
 404    1. merge1_bb (to account for the edge from guard1)
 405    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 406 */
 407
 408 static void
 409 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 410                                     bool is_new_loop, basic_block *new_exit_bb)
 411 {
 412   gphi *orig_phi, *new_phi;
 413   gphi *update_phi, *update_phi2;
 414   tree guard_arg, loop_arg;
 415   basic_block new_merge_bb = guard_edge->dest;
 416   edge e = EDGE_SUCC (new_merge_bb, 0);
 417   basic_block update_bb = e->dest;
 418   basic_block orig_bb = loop->header;
 419   edge new_exit_e;
 420   tree current_new_name;
 421   gphi_iterator gsi_orig, gsi_update;
 422
 423   /* Create new bb between loop and new_merge_bb.  */
 424   *new_exit_bb = split_edge (single_exit (loop));
 425
 426   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 427
 428   for (gsi_orig = gsi_start_phis (orig_bb),
 429        gsi_update = gsi_start_phis (update_bb);
 430        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 431        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 432     {
 433       source_location loop_locus, guard_locus;
 434       tree new_res;
 435       orig_phi = gsi_orig.phi ();
 436       update_phi = gsi_update.phi ();
 437
 438       /** 1. Handle new-merge-point phis  **/
 439
 440       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 441       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 442       new_phi = create_phi_node (new_res, new_merge_bb);
 443
 444       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 445             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 446       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 447       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 448                                                       EDGE_SUCC (loop->latch,
 449                                                                  0));
 450       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 451       guard_locus
 452         = gimple_phi_arg_location_from_edge (orig_phi,
 453                                              loop_preheader_edge (loop));
 454
 455       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 456       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 457
 458       /* 1.3. Update phi in successor block.  */
 459       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 460                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 461       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 462       update_phi2 = new_phi;
 463
 464
 465       /** 2. Handle loop-closed-ssa-form phis  **/
 466
 467       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 468         continue;
 469
 470       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 471       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 472       new_phi = create_phi_node (new_res, *new_exit_bb);
 473
 474       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 475       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 476
 477       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 478       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 479       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 480                                   PHI_RESULT (new_phi));
 481
 482       /* 2.4. Record the newly created name with set_current_def.
 483          We want to find a name such that
 484                 name = get_current_def (orig_loop_name)
 485          and to set its current definition as follows:
 486                 set_current_def (name, new_phi_name)
 487
 488          If LOOP is a new loop then loop_arg is already the name we're
 489          looking for. If LOOP is the original loop, then loop_arg is
 490          the orig_loop_name and the relevant name is recorded in its
 491          current reaching definition.  */
 492       if (is_new_loop)
 493         current_new_name = loop_arg;
 494       else
 495         {
 496           current_new_name = get_current_def (loop_arg);
 497           /* current_def is not available only if the variable does not
 498              change inside the loop, in which case we also don't care
 499              about recording a current_def for it because we won't be
 500              trying to create loop-exit-phis for it.  */
 501           if (!current_new_name)
 502             continue;
 503         }
 504       tree new_name = get_current_def (current_new_name);
 505       /* Because of peeled_chrec optimization it is possible that we have
 506          set this earlier.  Verify the PHI has the same value.  */
 507       if (new_name)
 508         {
 509           gimple phi = SSA_NAME_DEF_STMT (new_name);
 510           gcc_assert (gimple_code (phi) == GIMPLE_PHI
 511                       && gimple_bb (phi) == *new_exit_bb
 512                       && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
 513                           == loop_arg));
 514           continue;
 515         }
 516
 517       set_current_def (current_new_name, PHI_RESULT (new_phi));
 518     }
 519 }
 520
 521
 522 /* Function slpeel_update_phi_nodes_for_guard2
 523
 524    Input:
 525    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 526
 527    In the context of the overall structure, we have:
 528
 529         loop1_preheader_bb:
 530                 guard1 (goto loop1/merge1_bb)
 531         loop1
 532         loop1_exit_bb:
 533                 guard2 (goto merge1_bb/merge2_bb)
 534         merge1_bb
 535 LOOP->  loop2
 536         loop2_exit_bb
 537         merge2_bb
 538         next_bb
 539
 540    For each name used out side the loop (i.e - for each name that has an exit
 541    phi in next_bb) we create a new phi in:
 542    1. merge2_bb (to account for the edge from guard_bb)
 543    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 544    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 545       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 546 */
 547
 548 static void
 549 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 550                                     bool is_new_loop, basic_block *new_exit_bb)
 551 {
 552   gphi *orig_phi, *new_phi;
 553   gphi *update_phi, *update_phi2;
 554   tree guard_arg, loop_arg;
 555   basic_block new_merge_bb = guard_edge->dest;
 556   edge e = EDGE_SUCC (new_merge_bb, 0);
 557   basic_block update_bb = e->dest;
 558   edge new_exit_e;
 559   tree orig_def, orig_def_new_name;
 560   tree new_name, new_name2;
 561   tree arg;
 562   gphi_iterator gsi;
 563
 564   /* Create new bb between loop and new_merge_bb.  */
 565   *new_exit_bb = split_edge (single_exit (loop));
 566
 567   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 568
 569   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 570     {
 571       tree new_res;
 572       update_phi = gsi.phi ();
 573       orig_phi = update_phi;
 574       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 575       /* This loop-closed-phi actually doesn't represent a use
 576          out of the loop - the phi arg is a constant.  */
 577       if (TREE_CODE (orig_def) != SSA_NAME)
 578         continue;
 579       orig_def_new_name = get_current_def (orig_def);
 580       arg = NULL_TREE;
 581
 582       /** 1. Handle new-merge-point phis  **/
 583
 584       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 585       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 586       new_phi = create_phi_node (new_res, new_merge_bb);
 587
 588       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 589             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 590       new_name = orig_def;
 591       new_name2 = NULL_TREE;
 592       if (orig_def_new_name)
 593         {
 594           new_name = orig_def_new_name;
 595           /* Some variables have both loop-entry-phis and loop-exit-phis.
 596              Such variables were given yet newer names by phis placed in
 597              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 598              new_name2 = get_current_def (get_current_def (orig_name)).  */
 599           new_name2 = get_current_def (new_name);
 600         }
 601
 602       if (is_new_loop)
 603         {
 604           guard_arg = orig_def;
 605           loop_arg = new_name;
 606         }
 607       else
 608         {
 609           guard_arg = new_name;
 610           loop_arg = orig_def;
 611         }
 612       if (new_name2)
 613         guard_arg = new_name2;
 614
 615       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 616       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 617
 618       /* 1.3. Update phi in successor block.  */
 619       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 620       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 621       update_phi2 = new_phi;
 622
 623
 624       /** 2. Handle loop-closed-ssa-form phis  **/
 625
 626       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 627       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 628       new_phi = create_phi_node (new_res, *new_exit_bb);
 629
 630       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 631       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 632
 633       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 634       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 635       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 636                                   PHI_RESULT (new_phi));
 637
 638
 639       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 640
 641       /* 3.1. Find the relevant names that need an exit-phi in
 642          GUARD_BB, i.e. names for which
 643          slpeel_update_phi_nodes_for_guard1 had not already created a
 644          phi node. This is the case for names that are used outside
 645          the loop (and therefore need an exit phi) but are not updated
 646          across loop iterations (and therefore don't have a
 647          loop-header-phi).
 648
 649          slpeel_update_phi_nodes_for_guard1 is responsible for
 650          creating loop-exit phis in GUARD_BB for names that have a
 651          loop-header-phi.  When such a phi is created we also record
 652          the new name in its current definition.  If this new name
 653          exists, then guard_arg was set to this new name (see 1.2
 654          above).  Therefore, if guard_arg is not this new name, this
 655          is an indication that an exit-phi in GUARD_BB was not yet
 656          created, so we take care of it here.  */
 657       if (guard_arg == new_name2)
 658         continue;
 659       arg = guard_arg;
 660
 661       /* 3.2. Generate new phi node in GUARD_BB:  */
 662       new_res = copy_ssa_name (PHI_RESULT (orig_phi));
 663       new_phi = create_phi_node (new_res, guard_edge->src);
 664
 665       /* 3.3. GUARD_BB has one incoming edge:  */
 666       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 667       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 668                    UNKNOWN_LOCATION);
 669
 670       /* 3.4. Update phi in successor of GUARD_BB:  */
 671       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 672                                                                 == guard_arg);
 673       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 674                                   PHI_RESULT (new_phi));
 675     }
 676 }
 677
 678
 679 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 680    that starts at zero, increases by one and its limit is NITERS.
 681
 682    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 683
 684 void
 685 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 686 {
 687   tree indx_before_incr, indx_after_incr;
 688   gcond *cond_stmt;
 689   gcond *orig_cond;
 690   edge exit_edge = single_exit (loop);
 691   gimple_stmt_iterator loop_cond_gsi;
 692   gimple_stmt_iterator incr_gsi;
 693   bool insert_after;
 694   tree init = build_int_cst (TREE_TYPE (niters), 0);
 695   tree step = build_int_cst (TREE_TYPE (niters), 1);
 696   source_location loop_loc;
 697   enum tree_code code;
 698
 699   orig_cond = get_loop_exit_condition (loop);
 700   gcc_assert (orig_cond);
 701   loop_cond_gsi = gsi_for_stmt (orig_cond);
 702
 703   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 704   create_iv (init, step, NULL_TREE, loop,
 705              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 706
 707   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 708                                               true, NULL_TREE, true,
 709                                               GSI_SAME_STMT);
 710   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 711                                      true, GSI_SAME_STMT);
 712
 713   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 714   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 715                                  NULL_TREE);
 716
 717   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 718
 719   /* Remove old loop exit test:  */
 720   gsi_remove (&loop_cond_gsi, true);
 721   free_stmt_vec_info (orig_cond);
 722
 723   loop_loc = find_loop_location (loop);
 724   if (dump_enabled_p ())
 725     {
 726       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 727         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 728                      LOCATION_LINE (loop_loc));
 729       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 730       dump_printf (MSG_NOTE, "\n");
 731     }
 732   loop->nb_iterations = niters;
 733 }
 734
 735 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 736    For all PHI arguments in FROM->dest and TO->dest from those
 737    edges ensure that TO->dest PHI arguments have current_def
 738    to that in from.  */
 739
 740 static void
 741 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 742 {
 743   gimple_stmt_iterator gsi_from, gsi_to;
 744
 745   for (gsi_from = gsi_start_phis (from->dest),
 746        gsi_to = gsi_start_phis (to->dest);
 747        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
 748        gsi_next (&gsi_from), gsi_next (&gsi_to))
 749     {
 750       gimple from_phi = gsi_stmt (gsi_from);
 751       gimple to_phi = gsi_stmt (gsi_to);
 752       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 753       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 754       if (TREE_CODE (from_arg) == SSA_NAME
 755           && TREE_CODE (to_arg) == SSA_NAME
 756           && get_current_def (to_arg) == NULL_TREE)
 757         set_current_def (to_arg, get_current_def (from_arg));
 758     }
 759 }
 760
 761
 762 /* Given LOOP this function generates a new copy of it and puts it
 763    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 764    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 765    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 766    entry or exit of LOOP.  */
 767
 768 struct loop *
 769 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 770                                         struct loop *scalar_loop, edge e)
 771 {
 772   struct loop *new_loop;
 773   basic_block *new_bbs, *bbs;
 774   bool at_exit;
 775   bool was_imm_dom;
 776   basic_block exit_dest;
 777   edge exit, new_exit;
 778
 779   exit = single_exit (loop);
 780   at_exit = (e == exit);
 781   if (!at_exit && e != loop_preheader_edge (loop))
 782     return NULL;
 783
 784   if (scalar_loop == NULL)
 785     scalar_loop = loop;
 786
 787   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 788   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
 789
 790   /* Check whether duplication is possible.  */
 791   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
 792     {
 793       free (bbs);
 794       return NULL;
 795     }
 796
 797   /* Generate new loop structure.  */
 798   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 799   duplicate_subloops (scalar_loop, new_loop);
 800
 801   exit_dest = exit->dest;
 802   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 803                                           exit_dest) == loop->header ?
 804                  true : false);
 805
 806   /* Also copy the pre-header, this avoids jumping through hoops to
 807      duplicate the loop entry PHI arguments.  Create an empty
 808      pre-header unconditionally for this.  */
 809   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 810   edge entry_e = single_pred_edge (preheader);
 811   bbs[scalar_loop->num_nodes] = preheader;
 812   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 813
 814   exit = single_exit (scalar_loop);
 815   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 816             &exit, 1, &new_exit, NULL,
 817             e->src, true);
 818   exit = single_exit (loop);
 819   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
 820
 821   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 822
 823   if (scalar_loop != loop)
 824     {
 825       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 826          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 827          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 828          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 829          header) to have current_def set, so copy them over.  */
 830       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 831                                                 exit);
 832       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 833                                                            0),
 834                                                 EDGE_SUCC (loop->latch, 0));
 835     }
 836
 837   if (at_exit) /* Add the loop copy at exit.  */
 838     {
 839       if (scalar_loop != loop)
 840         {
 841           gphi_iterator gsi;
 842           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 843
 844           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 845                gsi_next (&gsi))
 846             {
 847               gphi *phi = gsi.phi ();
 848               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 849               location_t orig_locus
 850                 = gimple_phi_arg_location_from_edge (phi, e);
 851
 852               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 853             }
 854         }
 855       redirect_edge_and_branch_force (e, new_preheader);
 856       flush_pending_stmts (e);
 857       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 858       if (was_imm_dom)
 859         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 860
 861       /* And remove the non-necessary forwarder again.  Keep the other
 862          one so we have a proper pre-header for the loop at the exit edge.  */
 863       redirect_edge_pred (single_succ_edge (preheader),
 864                           single_pred (preheader));
 865       delete_basic_block (preheader);
 866       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 867                                loop_preheader_edge (scalar_loop)->src);
 868     }
 869   else /* Add the copy at entry.  */
 870     {
 871       if (scalar_loop != loop)
 872         {
 873           /* Remove the non-necessary forwarder of scalar_loop again.  */
 874           redirect_edge_pred (single_succ_edge (preheader),
 875                               single_pred (preheader));
 876           delete_basic_block (preheader);
 877           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 878                                    loop_preheader_edge (scalar_loop)->src);
 879           preheader = split_edge (loop_preheader_edge (loop));
 880           entry_e = single_pred_edge (preheader);
 881         }
 882
 883       redirect_edge_and_branch_force (entry_e, new_preheader);
 884       flush_pending_stmts (entry_e);
 885       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 886
 887       redirect_edge_and_branch_force (new_exit, preheader);
 888       flush_pending_stmts (new_exit);
 889       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 890
 891       /* And remove the non-necessary forwarder again.  Keep the other
 892          one so we have a proper pre-header for the loop at the exit edge.  */
 893       redirect_edge_pred (single_succ_edge (new_preheader),
 894                           single_pred (new_preheader));
 895       delete_basic_block (new_preheader);
 896       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 897                                loop_preheader_edge (new_loop)->src);
 898     }
 899
 900   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 901     rename_variables_in_bb (new_bbs[i]);
 902
 903   if (scalar_loop != loop)
 904     {
 905       /* Update new_loop->header PHIs, so that on the preheader
 906          edge they are the ones from loop rather than scalar_loop.  */
 907       gphi_iterator gsi_orig, gsi_new;
 908       edge orig_e = loop_preheader_edge (loop);
 909       edge new_e = loop_preheader_edge (new_loop);
 910
 911       for (gsi_orig = gsi_start_phis (loop->header),
 912            gsi_new = gsi_start_phis (new_loop->header);
 913            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 914            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 915         {
 916           gphi *orig_phi = gsi_orig.phi ();
 917           gphi *new_phi = gsi_new.phi ();
 918           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 919           location_t orig_locus
 920             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 921
 922           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 923         }
 924     }
 925
 926   free (new_bbs);
 927   free (bbs);
 928
 929 #ifdef ENABLE_CHECKING
 930   verify_dominators (CDI_DOMINATORS);
 931 #endif
 932
 933   return new_loop;
 934 }
 935
 936
 937 /* Given the condition statement COND, put it as the last statement
 938    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 939    Assumes that this is the single exit of the guarded loop.
 940    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 941
 942 static edge
 943 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 944                        gimple_seq cond_expr_stmt_list,
 945                        basic_block exit_bb, basic_block dom_bb,
 946                        int probability)
 947 {
 948   gimple_stmt_iterator gsi;
 949   edge new_e, enter_e;
 950   gcond *cond_stmt;
 951   gimple_seq gimplify_stmt_list = NULL;
 952
 953   enter_e = EDGE_SUCC (guard_bb, 0);
 954   enter_e->flags &= ~EDGE_FALLTHRU;
 955   enter_e->flags |= EDGE_FALSE_VALUE;
 956   gsi = gsi_last_bb (guard_bb);
 957
 958   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 959                                  NULL_TREE);
 960   if (gimplify_stmt_list)
 961     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 962   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 963   if (cond_expr_stmt_list)
 964     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 965
 966   gsi = gsi_last_bb (guard_bb);
 967   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 968
 969   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 970   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 971
 972   new_e->count = guard_bb->count;
 973   new_e->probability = probability;
 974   new_e->count = apply_probability (enter_e->count, probability);
 975   enter_e->count -= new_e->count;
 976   enter_e->probability = inverse_probability (probability);
 977   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 978   return new_e;
 979 }
 980
 981
 982 /* This function verifies that the following restrictions apply to LOOP:
 983    (1) it is innermost
 984    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 985    (3) it is single entry, single exit
 986    (4) its exit condition is the last stmt in the header
 987    (5) E is the entry/exit edge of LOOP.
 988  */
 989
 990 bool
 991 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 992 {
 993   edge exit_e = single_exit (loop);
 994   edge entry_e = loop_preheader_edge (loop);
 995   gcond *orig_cond = get_loop_exit_condition (loop);
 996   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 997
 998   if (loop->inner
 999       /* All loops have an outer scope; the only case loop->outer is NULL is for
1000          the function itself.  */
1001       || !loop_outer (loop)
1002       || loop->num_nodes != 2
1003       || !empty_block_p (loop->latch)
1004       || !single_exit (loop)
1005       /* Verify that new loop exit condition can be trivially modified.  */
1006       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1007       || (e != exit_e && e != entry_e))
1008     return false;
1009
1010   return true;
1011 }
1012
1013 #ifdef ENABLE_CHECKING
1014 static void
1015 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1016                                  struct loop *second_loop)
1017 {
1018   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1019   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1020   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1021
1022   /* A guard that controls whether the second_loop is to be executed or skipped
1023      is placed in first_loop->exit.  first_loop->exit therefore has two
1024      successors - one is the preheader of second_loop, and the other is a bb
1025      after second_loop.
1026    */
1027   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1028
1029   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1030         of second_loop.  */
1031
1032   /* The preheader of new_loop is expected to have two predecessors:
1033      first_loop->exit and the block that precedes first_loop.  */
1034
1035   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1036               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1037                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1038                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1039                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1040
1041   /* Verify that the other successor of first_loop->exit is after the
1042      second_loop.  */
1043   /* TODO */
1044 }
1045 #endif
1046
1047 /* If the run time cost model check determines that vectorization is
1048    not profitable and hence scalar loop should be generated then set
1049    FIRST_NITERS to prologue peeled iterations. This will allow all the
1050    iterations to be executed in the prologue peeled scalar loop.  */
1051
1052 static void
1053 set_prologue_iterations (basic_block bb_before_first_loop,
1054                          tree *first_niters,
1055                          struct loop *loop,
1056                          unsigned int th,
1057                          int probability)
1058 {
1059   edge e;
1060   basic_block cond_bb, then_bb;
1061   tree var, prologue_after_cost_adjust_name;
1062   gimple_stmt_iterator gsi;
1063   gphi *newphi;
1064   edge e_true, e_false, e_fallthru;
1065   gcond *cond_stmt;
1066   gimple_seq stmts = NULL;
1067   tree cost_pre_condition = NULL_TREE;
1068   tree scalar_loop_iters =
1069     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1070
1071   e = single_pred_edge (bb_before_first_loop);
1072   cond_bb = split_edge (e);
1073
1074   e = single_pred_edge (bb_before_first_loop);
1075   then_bb = split_edge (e);
1076   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1077
1078   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1079                                    EDGE_FALSE_VALUE);
1080   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1081
1082   e_true = EDGE_PRED (then_bb, 0);
1083   e_true->flags &= ~EDGE_FALLTHRU;
1084   e_true->flags |= EDGE_TRUE_VALUE;
1085
1086   e_true->probability = probability;
1087   e_false->probability = inverse_probability (probability);
1088   e_true->count = apply_probability (cond_bb->count, probability);
1089   e_false->count = cond_bb->count - e_true->count;
1090   then_bb->frequency = EDGE_FREQUENCY (e_true);
1091   then_bb->count = e_true->count;
1092
1093   e_fallthru = EDGE_SUCC (then_bb, 0);
1094   e_fallthru->count = then_bb->count;
1095
1096   gsi = gsi_last_bb (cond_bb);
1097   cost_pre_condition =
1098     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1099                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1100   cost_pre_condition =
1101     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1102                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
1103   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1104                                            NULL_TREE, NULL_TREE);
1105   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1106
1107   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1108                         "prologue_after_cost_adjust");
1109   prologue_after_cost_adjust_name =
1110     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1111
1112   gsi = gsi_last_bb (then_bb);
1113   if (stmts)
1114     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1115
1116   newphi = create_phi_node (var, bb_before_first_loop);
1117   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1118                UNKNOWN_LOCATION);
1119   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1120
1121   *first_niters = PHI_RESULT (newphi);
1122 }
1123
1124 /* Function slpeel_tree_peel_loop_to_edge.
1125
1126    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1127    that is placed on the entry (exit) edge E of LOOP. After this transformation
1128    we have two loops one after the other - first-loop iterates FIRST_NITERS
1129    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1130    If the cost model indicates that it is profitable to emit a scalar
1131    loop instead of the vector one, then the prolog (epilog) loop will iterate
1132    for the entire unchanged scalar iterations of the loop.
1133
1134    Input:
1135    - LOOP: the loop to be peeled.
1136    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1137         should be copied.
1138    - E: the exit or entry edge of LOOP.
1139         If it is the entry edge, we peel the first iterations of LOOP. In this
1140         case first-loop is LOOP, and second-loop is the newly created loop.
1141         If it is the exit edge, we peel the last iterations of LOOP. In this
1142         case, first-loop is the newly created loop, and second-loop is LOOP.
1143    - NITERS: the number of iterations that LOOP iterates.
1144    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1145    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1146         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1147         is false, the caller of this function may want to take care of this
1148         (this can be useful if we don't want new stmts added to first-loop).
1149    - TH: cost model profitability threshold of iterations for vectorization.
1150    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1151                           during versioning and hence needs to occur during
1152                           prologue generation or whether cost model check
1153                           has not occurred during prologue generation and hence
1154                           needs to occur during epilogue generation.
1155    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1156    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1157
1158
1159    Output:
1160    The function returns a pointer to the new loop-copy, or NULL if it failed
1161    to perform the transformation.
1162
1163    The function generates two if-then-else guards: one before the first loop,
1164    and the other before the second loop:
1165    The first guard is:
1166      if (FIRST_NITERS == 0) then skip the first loop,
1167      and go directly to the second loop.
1168    The second guard is:
1169      if (FIRST_NITERS == NITERS) then skip the second loop.
1170
1171    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1172    then the generated condition is combined with COND_EXPR and the
1173    statements in COND_EXPR_STMT_LIST are emitted together with it.
1174
1175    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1176    FORNOW the resulting code will not be in loop-closed-ssa form.
1177 */
1178
1179 static struct loop *
1180 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1181                                edge e, tree *first_niters,
1182                                tree niters, bool update_first_loop_count,
1183                                unsigned int th, bool check_profitability,
1184                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1185                                int bound1, int bound2)
1186 {
1187   struct loop *new_loop = NULL, *first_loop, *second_loop;
1188   edge skip_e;
1189   tree pre_condition = NULL_TREE;
1190   basic_block bb_before_second_loop, bb_after_second_loop;
1191   basic_block bb_before_first_loop;
1192   basic_block bb_between_loops;
1193   basic_block new_exit_bb;
1194   gphi_iterator gsi;
1195   edge exit_e = single_exit (loop);
1196   source_location loop_loc;
1197   /* There are many aspects to how likely the first loop is going to be executed.
1198      Without histogram we can't really do good job.  Simply set it to
1199      2/3, so the first loop is not reordered to the end of function and
1200      the hot path through stays short.  */
1201   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1202   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1203   int probability_of_second_loop;
1204
1205   if (!slpeel_can_duplicate_loop_p (loop, e))
1206     return NULL;
1207
1208   /* We might have a queued need to update virtual SSA form.  As we
1209      delete the update SSA machinery below after doing a regular
1210      incremental SSA update during loop copying make sure we don't
1211      lose that fact.
1212      ???  Needing to update virtual SSA form by renaming is unfortunate
1213      but not all of the vectorizer code inserting new loads / stores
1214      properly assigns virtual operands to those statements.  */
1215   update_ssa (TODO_update_ssa_only_virtuals);
1216
1217   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1218      in the exit bb and rename all the uses after the loop.  This simplifies
1219      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1220      (but normally loop closed SSA form doesn't require virtual PHIs to be
1221      in the same form).  Doing this early simplifies the checking what
1222      uses should be renamed.  */
1223   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1224     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1225       {
1226         gphi *phi = gsi.phi ();
1227         for (gsi = gsi_start_phis (exit_e->dest);
1228              !gsi_end_p (gsi); gsi_next (&gsi))
1229           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1230             break;
1231         if (gsi_end_p (gsi))
1232           {
1233             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1234             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
1235             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1236             imm_use_iterator imm_iter;
1237             gimple stmt;
1238             use_operand_p use_p;
1239
1240             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1241             gimple_phi_set_result (new_phi, new_vop);
1242             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1243               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1244                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1245                   SET_USE (use_p, new_vop);
1246           }
1247         break;
1248       }
1249
1250   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1251         Resulting CFG would be:
1252
1253         first_loop:
1254         do {
1255         } while ...
1256
1257         second_loop:
1258         do {
1259         } while ...
1260
1261         orig_exit_bb:
1262    */
1263
1264   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1265                                                            e)))
1266     {
1267       loop_loc = find_loop_location (loop);
1268       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1269                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1270       return NULL;
1271     }
1272
1273   if (MAY_HAVE_DEBUG_STMTS)
1274     {
1275       gcc_assert (!adjust_vec.exists ());
1276       adjust_vec.create (32);
1277     }
1278
1279   if (e == exit_e)
1280     {
1281       /* NEW_LOOP was placed after LOOP.  */
1282       first_loop = loop;
1283       second_loop = new_loop;
1284     }
1285   else
1286     {
1287       /* NEW_LOOP was placed before LOOP.  */
1288       first_loop = new_loop;
1289       second_loop = loop;
1290     }
1291
1292   /* 2.  Add the guard code in one of the following ways:
1293
1294      2.a Add the guard that controls whether the first loop is executed.
1295          This occurs when this function is invoked for prologue or epilogue
1296          generation and when the cost model check can be done at compile time.
1297
1298          Resulting CFG would be:
1299
1300          bb_before_first_loop:
1301          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1302                                 GOTO first-loop
1303
1304          first_loop:
1305          do {
1306          } while ...
1307
1308          bb_before_second_loop:
1309
1310          second_loop:
1311          do {
1312          } while ...
1313
1314          orig_exit_bb:
1315
1316      2.b Add the cost model check that allows the prologue
1317          to iterate for the entire unchanged scalar
1318          iterations of the loop in the event that the cost
1319          model indicates that the scalar loop is more
1320          profitable than the vector one. This occurs when
1321          this function is invoked for prologue generation
1322          and the cost model check needs to be done at run
1323          time.
1324
1325          Resulting CFG after prologue peeling would be:
1326
1327          if (scalar_loop_iterations <= th)
1328            FIRST_NITERS = scalar_loop_iterations
1329
1330          bb_before_first_loop:
1331          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1332                                 GOTO first-loop
1333
1334          first_loop:
1335          do {
1336          } while ...
1337
1338          bb_before_second_loop:
1339
1340          second_loop:
1341          do {
1342          } while ...
1343
1344          orig_exit_bb:
1345
1346      2.c Add the cost model check that allows the epilogue
1347          to iterate for the entire unchanged scalar
1348          iterations of the loop in the event that the cost
1349          model indicates that the scalar loop is more
1350          profitable than the vector one. This occurs when
1351          this function is invoked for epilogue generation
1352          and the cost model check needs to be done at run
1353          time.  This check is combined with any pre-existing
1354          check in COND_EXPR to avoid versioning.
1355
1356          Resulting CFG after prologue peeling would be:
1357
1358          bb_before_first_loop:
1359          if ((scalar_loop_iterations <= th)
1360              ||
1361              FIRST_NITERS == 0) GOTO bb_before_second_loop
1362                                 GOTO first-loop
1363
1364          first_loop:
1365          do {
1366          } while ...
1367
1368          bb_before_second_loop:
1369
1370          second_loop:
1371          do {
1372          } while ...
1373
1374          orig_exit_bb:
1375   */
1376
1377   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1378   /* Loop copying insterted a forwarder block for us here.  */
1379   bb_before_second_loop = single_exit (first_loop)->dest;
1380
1381   probability_of_second_loop = (inverse_probability (first_guard_probability)
1382                                 + combine_probabilities (second_guard_probability,
1383                                                          first_guard_probability));
1384   /* Theoretically preheader edge of first loop and exit edge should have
1385      same frequencies.  Loop exit probablities are however easy to get wrong.
1386      It is safer to copy value from original loop entry.  */
1387   bb_before_second_loop->frequency
1388      = combine_probabilities (bb_before_first_loop->frequency,
1389                               probability_of_second_loop);
1390   bb_before_second_loop->count
1391      = apply_probability (bb_before_first_loop->count,
1392                           probability_of_second_loop);
1393   single_succ_edge (bb_before_second_loop)->count
1394      = bb_before_second_loop->count;
1395
1396   /* Epilogue peeling.  */
1397   if (!update_first_loop_count)
1398     {
1399       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1400       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1401       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1402       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1403         limit = limit + 1;
1404       if (check_profitability
1405           && th > limit)
1406         limit = th;
1407       pre_condition =
1408         fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1409                      build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1410       if (cond_expr)
1411         {
1412           pre_condition =
1413             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1414                          pre_condition,
1415                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1416                                       cond_expr));
1417         }
1418     }
1419
1420   /* Prologue peeling.  */
1421   else
1422     {
1423       if (check_profitability)
1424         set_prologue_iterations (bb_before_first_loop, first_niters,
1425                                  loop, th, first_guard_probability);
1426
1427       pre_condition =
1428         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1429                      build_int_cst (TREE_TYPE (*first_niters), 0));
1430     }
1431
1432   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1433                                   cond_expr_stmt_list,
1434                                   bb_before_second_loop, bb_before_first_loop,
1435                                   inverse_probability (first_guard_probability));
1436   scale_loop_profile (first_loop, first_guard_probability,
1437                       check_profitability && (int)th > bound1 ? th : bound1);
1438   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1439                                       first_loop == new_loop,
1440                                       &new_exit_bb);
1441
1442
1443   /* 3. Add the guard that controls whether the second loop is executed.
1444         Resulting CFG would be:
1445
1446         bb_before_first_loop:
1447         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1448                                GOTO first-loop
1449
1450         first_loop:
1451         do {
1452         } while ...
1453
1454         bb_between_loops:
1455         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1456                                     GOTO bb_before_second_loop
1457
1458         bb_before_second_loop:
1459
1460         second_loop:
1461         do {
1462         } while ...
1463
1464         bb_after_second_loop:
1465
1466         orig_exit_bb:
1467    */
1468
1469   bb_between_loops = new_exit_bb;
1470   bb_after_second_loop = split_edge (single_exit (second_loop));
1471
1472   pre_condition =
1473         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1474   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1475                                   bb_after_second_loop, bb_before_first_loop,
1476                                   inverse_probability (second_guard_probability));
1477   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1478   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1479                                      second_loop == new_loop, &new_exit_bb);
1480
1481   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1482    */
1483   if (update_first_loop_count)
1484     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1485
1486   delete_update_ssa ();
1487
1488   adjust_vec_debug_stmts ();
1489
1490   return new_loop;
1491 }
1492
1493 /* Function vect_get_loop_location.
1494
1495    Extract the location of the loop in the source code.
1496    If the loop is not well formed for vectorization, an estimated
1497    location is calculated.
1498    Return the loop location if succeed and NULL if not.  */
1499
1500 source_location
1501 find_loop_location (struct loop *loop)
1502 {
1503   gimple stmt = NULL;
1504   basic_block bb;
1505   gimple_stmt_iterator si;
1506
1507   if (!loop)
1508     return UNKNOWN_LOCATION;
1509
1510   stmt = get_loop_exit_condition (loop);
1511
1512   if (stmt
1513       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1514     return gimple_location (stmt);
1515
1516   /* If we got here the loop is probably not "well formed",
1517      try to estimate the loop location */
1518
1519   if (!loop->header)
1520     return UNKNOWN_LOCATION;
1521
1522   bb = loop->header;
1523
1524   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1525     {
1526       stmt = gsi_stmt (si);
1527       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1528         return gimple_location (stmt);
1529     }
1530
1531   return UNKNOWN_LOCATION;
1532 }
1533
1534
1535 /* Function vect_can_advance_ivs_p
1536
1537    In case the number of iterations that LOOP iterates is unknown at compile
1538    time, an epilog loop will be generated, and the loop induction variables
1539    (IVs) will be "advanced" to the value they are supposed to take just before
1540    the epilog loop.  Here we check that the access function of the loop IVs
1541    and the expression that represents the loop bound are simple enough.
1542    These restrictions will be relaxed in the future.  */
1543
1544 bool
1545 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1546 {
1547   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1548   basic_block bb = loop->header;
1549   gimple phi;
1550   gphi_iterator gsi;
1551
1552   /* Analyze phi functions of the loop header.  */
1553
1554   if (dump_enabled_p ())
1555     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1556   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1557     {
1558       tree evolution_part;
1559
1560       phi = gsi.phi ();
1561       if (dump_enabled_p ())
1562         {
1563           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1564           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1565           dump_printf (MSG_NOTE, "\n");
1566         }
1567
1568       /* Skip virtual phi's. The data dependences that are associated with
1569          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1570
1571       if (virtual_operand_p (PHI_RESULT (phi)))
1572         {
1573           if (dump_enabled_p ())
1574             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1575                              "virtual phi. skip.\n");
1576           continue;
1577         }
1578
1579       /* Skip reduction phis.  */
1580
1581       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1582         {
1583           if (dump_enabled_p ())
1584             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1585                              "reduc phi. skip.\n");
1586           continue;
1587         }
1588
1589       /* Analyze the evolution function.  */
1590
1591       evolution_part
1592         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1593       if (evolution_part == NULL_TREE)
1594         {
1595           if (dump_enabled_p ())
1596             dump_printf (MSG_MISSED_OPTIMIZATION,
1597                          "No access function or evolution.\n");
1598           return false;
1599         }
1600
1601       /* FORNOW: We do not transform initial conditions of IVs
1602          which evolution functions are a polynomial of degree >= 2.  */
1603
1604       if (tree_is_chrec (evolution_part))
1605         return false;
1606     }
1607
1608   return true;
1609 }
1610
1611
1612 /*   Function vect_update_ivs_after_vectorizer.
1613
1614      "Advance" the induction variables of LOOP to the value they should take
1615      after the execution of LOOP.  This is currently necessary because the
1616      vectorizer does not handle induction variables that are used after the
1617      loop.  Such a situation occurs when the last iterations of LOOP are
1618      peeled, because:
1619      1. We introduced new uses after LOOP for IVs that were not originally used
1620         after LOOP: the IVs of LOOP are now used by an epilog loop.
1621      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1622         times, whereas the loop IVs should be bumped N times.
1623
1624      Input:
1625      - LOOP - a loop that is going to be vectorized. The last few iterations
1626               of LOOP were peeled.
1627      - NITERS - the number of iterations that LOOP executes (before it is
1628                 vectorized). i.e, the number of times the ivs should be bumped.
1629      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1630                   coming out from LOOP on which there are uses of the LOOP ivs
1631                   (this is the path from LOOP->exit to epilog_loop->preheader).
1632
1633                   The new definitions of the ivs are placed in LOOP->exit.
1634                   The phi args associated with the edge UPDATE_E in the bb
1635                   UPDATE_E->dest are updated accordingly.
1636
1637      Assumption 1: Like the rest of the vectorizer, this function assumes
1638      a single loop exit that has a single predecessor.
1639
1640      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1641      organized in the same order.
1642
1643      Assumption 3: The access function of the ivs is simple enough (see
1644      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1645
1646      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1647      coming out of LOOP on which the ivs of LOOP are used (this is the path
1648      that leads to the epilog loop; other paths skip the epilog loop).  This
1649      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1650      needs to have its phis updated.
1651  */
1652
1653 static void
1654 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1655                                   edge update_e)
1656 {
1657   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1658   basic_block exit_bb = single_exit (loop)->dest;
1659   gphi *phi, *phi1;
1660   gphi_iterator gsi, gsi1;
1661   basic_block update_bb = update_e->dest;
1662
1663   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1664
1665   /* Make sure there exists a single-predecessor exit bb:  */
1666   gcc_assert (single_pred_p (exit_bb));
1667
1668   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1669        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1670        gsi_next (&gsi), gsi_next (&gsi1))
1671     {
1672       tree init_expr;
1673       tree step_expr, off;
1674       tree type;
1675       tree var, ni, ni_name;
1676       gimple_stmt_iterator last_gsi;
1677       stmt_vec_info stmt_info;
1678
1679       phi = gsi.phi ();
1680       phi1 = gsi1.phi ();
1681       if (dump_enabled_p ())
1682         {
1683           dump_printf_loc (MSG_NOTE, vect_location,
1684                            "vect_update_ivs_after_vectorizer: phi: ");
1685           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1686           dump_printf (MSG_NOTE, "\n");
1687         }
1688
1689       /* Skip virtual phi's.  */
1690       if (virtual_operand_p (PHI_RESULT (phi)))
1691         {
1692           if (dump_enabled_p ())
1693             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1694                              "virtual phi. skip.\n");
1695           continue;
1696         }
1697
1698       /* Skip reduction phis.  */
1699       stmt_info = vinfo_for_stmt (phi);
1700       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1701         {
1702           if (dump_enabled_p ())
1703             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                              "reduc phi. skip.\n");
1705           continue;
1706         }
1707
1708       type = TREE_TYPE (gimple_phi_result (phi));
1709       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1710       step_expr = unshare_expr (step_expr);
1711
1712       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1713          of degree >= 2 or exponential.  */
1714       gcc_assert (!tree_is_chrec (step_expr));
1715
1716       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1717
1718       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1719                          fold_convert (TREE_TYPE (step_expr), niters),
1720                          step_expr);
1721       if (POINTER_TYPE_P (type))
1722         ni = fold_build_pointer_plus (init_expr, off);
1723       else
1724         ni = fold_build2 (PLUS_EXPR, type,
1725                           init_expr, fold_convert (type, off));
1726
1727       var = create_tmp_var (type, "tmp");
1728
1729       last_gsi = gsi_last_bb (exit_bb);
1730       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1731                                           true, GSI_SAME_STMT);
1732
1733       /* Fix phi expressions in the successor bb.  */
1734       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1735     }
1736 }
1737
1738 /* Function vect_do_peeling_for_loop_bound
1739
1740    Peel the last iterations of the loop represented by LOOP_VINFO.
1741    The peeled iterations form a new epilog loop.  Given that the loop now
1742    iterates NITERS times, the new epilog loop iterates
1743    NITERS % VECTORIZATION_FACTOR times.
1744
1745    The original loop will later be made to iterate
1746    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1747
1748    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1749    test.  */
1750
1751 void
1752 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1753                                 tree ni_name, tree ratio_mult_vf_name,
1754                                 unsigned int th, bool check_profitability)
1755 {
1756   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1757   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1758   struct loop *new_loop;
1759   edge update_e;
1760   basic_block preheader;
1761   int loop_num;
1762   int max_iter;
1763   tree cond_expr = NULL_TREE;
1764   gimple_seq cond_expr_stmt_list = NULL;
1765
1766   if (dump_enabled_p ())
1767     dump_printf_loc (MSG_NOTE, vect_location,
1768                      "=== vect_do_peeling_for_loop_bound ===\n");
1769
1770   initialize_original_copy_tables ();
1771
1772   loop_num  = loop->num;
1773
1774   new_loop
1775     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1776                                      &ratio_mult_vf_name, ni_name, false,
1777                                      th, check_profitability,
1778                                      cond_expr, cond_expr_stmt_list,
1779                                      0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1780   gcc_assert (new_loop);
1781   gcc_assert (loop_num == loop->num);
1782 #ifdef ENABLE_CHECKING
1783   slpeel_verify_cfg_after_peeling (loop, new_loop);
1784 #endif
1785
1786   /* A guard that controls whether the new_loop is to be executed or skipped
1787      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1788      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1789      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1790      is on the path where the LOOP IVs are used and need to be updated.  */
1791
1792   preheader = loop_preheader_edge (new_loop)->src;
1793   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1794     update_e = EDGE_PRED (preheader, 0);
1795   else
1796     update_e = EDGE_PRED (preheader, 1);
1797
1798   /* Update IVs of original loop as if they were advanced
1799      by ratio_mult_vf_name steps.  */
1800   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1801
1802   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1803      and this means N-2 loopback edge executions.
1804
1805      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1806      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1807   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1808               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1809               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1810   if (check_profitability)
1811     max_iter = MAX (max_iter, (int) th - 1);
1812   record_niter_bound (new_loop, max_iter, false, true);
1813   dump_printf (MSG_NOTE,
1814                "Setting upper bound of nb iterations for epilogue "
1815                "loop to %d\n", max_iter);
1816
1817   /* After peeling we have to reset scalar evolution analyzer.  */
1818   scev_reset ();
1819
1820   free_original_copy_tables ();
1821 }
1822
1823
1824 /* Function vect_gen_niters_for_prolog_loop
1825
1826    Set the number of iterations for the loop represented by LOOP_VINFO
1827    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1828    and the misalignment of DR - the data reference recorded in
1829    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1830    this loop, the data reference DR will refer to an aligned location.
1831
1832    The following computation is generated:
1833
1834    If the misalignment of DR is known at compile time:
1835      addr_mis = int mis = DR_MISALIGNMENT (dr);
1836    Else, compute address misalignment in bytes:
1837      addr_mis = addr & (vectype_align - 1)
1838
1839    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1840
1841    (elem_size = element type size; an element is the scalar element whose type
1842    is the inner type of the vectype)
1843
1844    When the step of the data-ref in the loop is not 1 (as in interleaved data
1845    and SLP), the number of iterations of the prolog must be divided by the step
1846    (which is equal to the size of interleaved group).
1847
1848    The above formulas assume that VF == number of elements in the vector. This
1849    may not hold when there are multiple-types in the loop.
1850    In this case, for some data-references in the loop the VF does not represent
1851    the number of elements that fit in the vector.  Therefore, instead of VF we
1852    use TYPE_VECTOR_SUBPARTS.  */
1853
1854 static tree
1855 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1856 {
1857   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1858   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1859   tree var;
1860   gimple_seq stmts;
1861   tree iters, iters_name;
1862   edge pe;
1863   basic_block new_bb;
1864   gimple dr_stmt = DR_STMT (dr);
1865   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1866   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1867   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1868   tree niters_type = TREE_TYPE (loop_niters);
1869   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1870
1871   pe = loop_preheader_edge (loop);
1872
1873   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1874     {
1875       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1876
1877       if (dump_enabled_p ())
1878         dump_printf_loc (MSG_NOTE, vect_location,
1879                          "known peeling = %d.\n", npeel);
1880
1881       iters = build_int_cst (niters_type, npeel);
1882       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1883     }
1884   else
1885     {
1886       gimple_seq new_stmts = NULL;
1887       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1888       tree offset = negative
1889           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1890       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1891                                                 &new_stmts, offset, loop);
1892       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1893       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1894       HOST_WIDE_INT elem_size =
1895                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1896       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1897       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1898       tree nelements_tree = build_int_cst (type, nelements);
1899       tree byte_misalign;
1900       tree elem_misalign;
1901
1902       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1903       gcc_assert (!new_bb);
1904
1905       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1906       byte_misalign =
1907         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1908                      vectype_align_minus_1);
1909
1910       /* Create:  elem_misalign = byte_misalign / element_size  */
1911       elem_misalign =
1912         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1913
1914       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1915       if (negative)
1916         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1917       else
1918         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1919       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1920       iters = fold_convert (niters_type, iters);
1921       *bound = nelements;
1922     }
1923
1924   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1925   /* If the loop bound is known at compile time we already verified that it is
1926      greater than vf; since the misalignment ('iters') is at most vf, there's
1927      no need to generate the MIN_EXPR in this case.  */
1928   if (TREE_CODE (loop_niters) != INTEGER_CST)
1929     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1930
1931   if (dump_enabled_p ())
1932     {
1933       dump_printf_loc (MSG_NOTE, vect_location,
1934                        "niters for prolog loop: ");
1935       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1936       dump_printf (MSG_NOTE, "\n");
1937     }
1938
1939   var = create_tmp_var (niters_type, "prolog_loop_niters");
1940   stmts = NULL;
1941   iters_name = force_gimple_operand (iters, &stmts, false, var);
1942
1943   /* Insert stmt on loop preheader edge.  */
1944   if (stmts)
1945     {
1946       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1947       gcc_assert (!new_bb);
1948     }
1949
1950   return iters_name;
1951 }
1952
1953
1954 /* Function vect_update_init_of_dr
1955
1956    NITERS iterations were peeled from LOOP.  DR represents a data reference
1957    in LOOP.  This function updates the information recorded in DR to
1958    account for the fact that the first NITERS iterations had already been
1959    executed.  Specifically, it updates the OFFSET field of DR.  */
1960
1961 static void
1962 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1963 {
1964   tree offset = DR_OFFSET (dr);
1965
1966   niters = fold_build2 (MULT_EXPR, sizetype,
1967                         fold_convert (sizetype, niters),
1968                         fold_convert (sizetype, DR_STEP (dr)));
1969   offset = fold_build2 (PLUS_EXPR, sizetype,
1970                         fold_convert (sizetype, offset), niters);
1971   DR_OFFSET (dr) = offset;
1972 }
1973
1974
1975 /* Function vect_update_inits_of_drs
1976
1977    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1978    This function updates the information recorded for the data references in
1979    the loop to account for the fact that the first NITERS iterations had
1980    already been executed.  Specifically, it updates the initial_condition of
1981    the access_function of all the data_references in the loop.  */
1982
1983 static void
1984 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1985 {
1986   unsigned int i;
1987   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1988   struct data_reference *dr;
1989
1990  if (dump_enabled_p ())
1991     dump_printf_loc (MSG_NOTE, vect_location,
1992                      "=== vect_update_inits_of_dr ===\n");
1993
1994   FOR_EACH_VEC_ELT (datarefs, i, dr)
1995     vect_update_init_of_dr (dr, niters);
1996 }
1997
1998
1999 /* Function vect_do_peeling_for_alignment
2000
2001    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2002    'niters' is set to the misalignment of one of the data references in the
2003    loop, thereby forcing it to refer to an aligned location at the beginning
2004    of the execution of this loop.  The data reference for which we are
2005    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2006
2007 void
2008 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
2009                                unsigned int th, bool check_profitability)
2010 {
2011   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2012   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2013   tree niters_of_prolog_loop;
2014   tree wide_prolog_niters;
2015   struct loop *new_loop;
2016   int max_iter;
2017   int bound = 0;
2018
2019   if (dump_enabled_p ())
2020     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2021                      "loop peeled for vectorization to enhance"
2022                      " alignment\n");
2023
2024   initialize_original_copy_tables ();
2025
2026   gimple_seq stmts = NULL;
2027   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2028   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2029                                                            ni_name,
2030                                                            &bound);
2031
2032   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2033   new_loop =
2034     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2035                                    loop_preheader_edge (loop),
2036                                    &niters_of_prolog_loop, ni_name, true,
2037                                    th, check_profitability, NULL_TREE, NULL,
2038                                    bound, 0);
2039
2040   gcc_assert (new_loop);
2041 #ifdef ENABLE_CHECKING
2042   slpeel_verify_cfg_after_peeling (new_loop, loop);
2043 #endif
2044   /* For vectorization factor N, we need to copy at most N-1 values
2045      for alignment and this means N-2 loopback edge executions.  */
2046   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2047   if (check_profitability)
2048     max_iter = MAX (max_iter, (int) th - 1);
2049   record_niter_bound (new_loop, max_iter, false, true);
2050   dump_printf (MSG_NOTE,
2051                "Setting upper bound of nb iterations for prologue "
2052                "loop to %d\n", max_iter);
2053
2054   /* Update number of times loop executes.  */
2055   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2056                 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2057   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2058                 TREE_TYPE (ni_name),
2059                 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2060
2061   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2062     wide_prolog_niters = niters_of_prolog_loop;
2063   else
2064     {
2065       gimple_seq seq = NULL;
2066       edge pe = loop_preheader_edge (loop);
2067       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2068       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2069       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2070                                                  var);
2071       if (seq)
2072         {
2073           /* Insert stmt on loop preheader edge.  */
2074           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2075           gcc_assert (!new_bb);
2076         }
2077     }
2078
2079   /* Update the init conditions of the access functions of all data refs.  */
2080   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2081
2082   /* After peeling we have to reset scalar evolution analyzer.  */
2083   scev_reset ();
2084
2085   free_original_copy_tables ();
2086 }
2087
2088
2089 /* Function vect_create_cond_for_align_checks.
2090
2091    Create a conditional expression that represents the alignment checks for
2092    all of data references (array element references) whose alignment must be
2093    checked at runtime.
2094
2095    Input:
2096    COND_EXPR  - input conditional expression.  New conditions will be chained
2097                 with logical AND operation.
2098    LOOP_VINFO - two fields of the loop information are used.
2099                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2100                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2101
2102    Output:
2103    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2104                          expression.
2105    The returned value is the conditional expression to be used in the if
2106    statement that controls which version of the loop gets executed at runtime.
2107
2108    The algorithm makes two assumptions:
2109      1) The number of bytes "n" in a vector is a power of 2.
2110      2) An address "a" is aligned if a%n is zero and that this
2111         test can be done as a&(n-1) == 0.  For example, for 16
2112         byte vectors the test is a&0xf == 0.  */
2113
2114 static void
2115 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2116                                    tree *cond_expr,
2117                                    gimple_seq *cond_expr_stmt_list)
2118 {
2119   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2120   vec<gimple> may_misalign_stmts
2121     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2122   gimple ref_stmt;
2123   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2124   tree mask_cst;
2125   unsigned int i;
2126   tree int_ptrsize_type;
2127   char tmp_name[20];
2128   tree or_tmp_name = NULL_TREE;
2129   tree and_tmp_name;
2130   gimple and_stmt;
2131   tree ptrsize_zero;
2132   tree part_cond_expr;
2133
2134   /* Check that mask is one less than a power of 2, i.e., mask is
2135      all zeros followed by all ones.  */
2136   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2137
2138   int_ptrsize_type = signed_type_for (ptr_type_node);
2139
2140   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2141      of the first vector of the i'th data reference. */
2142
2143   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2144     {
2145       gimple_seq new_stmt_list = NULL;
2146       tree addr_base;
2147       tree addr_tmp_name;
2148       tree new_or_tmp_name;
2149       gimple addr_stmt, or_stmt;
2150       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2151       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2152       bool negative = tree_int_cst_compare
2153         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2154       tree offset = negative
2155         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2156
2157       /* create: addr_tmp = (int)(address_of_first_vector) */
2158       addr_base =
2159         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2160                                               offset, loop);
2161       if (new_stmt_list != NULL)
2162         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2163
2164       sprintf (tmp_name, "addr2int%d", i);
2165       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2166       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2167       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2168
2169       /* The addresses are OR together.  */
2170
2171       if (or_tmp_name != NULL_TREE)
2172         {
2173           /* create: or_tmp = or_tmp | addr_tmp */
2174           sprintf (tmp_name, "orptrs%d", i);
2175           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2176           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2177                                          or_tmp_name, addr_tmp_name);
2178           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2179           or_tmp_name = new_or_tmp_name;
2180         }
2181       else
2182         or_tmp_name = addr_tmp_name;
2183
2184     } /* end for i */
2185
2186   mask_cst = build_int_cst (int_ptrsize_type, mask);
2187
2188   /* create: and_tmp = or_tmp & mask  */
2189   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2190
2191   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2192                                   or_tmp_name, mask_cst);
2193   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2194
2195   /* Make and_tmp the left operand of the conditional test against zero.
2196      if and_tmp has a nonzero bit then some address is unaligned.  */
2197   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2198   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2199                                 and_tmp_name, ptrsize_zero);
2200   if (*cond_expr)
2201     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2202                               *cond_expr, part_cond_expr);
2203   else
2204     *cond_expr = part_cond_expr;
2205 }
2206
2207 /* Function vect_create_cond_for_alias_checks.
2208
2209    Create a conditional expression that represents the run-time checks for
2210    overlapping of address ranges represented by a list of data references
2211    relations passed as input.
2212
2213    Input:
2214    COND_EXPR  - input conditional expression.  New conditions will be chained
2215                 with logical AND operation.  If it is NULL, then the function
2216                 is used to return the number of alias checks.
2217    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2218                 to be checked.
2219
2220    Output:
2221    COND_EXPR - conditional expression.
2222
2223    The returned COND_EXPR is the conditional expression to be used in the if
2224    statement that controls which version of the loop gets executed at runtime.
2225 */
2226
2227 void
2228 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2229 {
2230   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2231     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2232   tree part_cond_expr;
2233
2234   /* Create expression
2235      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2236      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2237      &&
2238      ...
2239      &&
2240      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2241      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2242
2243   if (comp_alias_ddrs.is_empty ())
2244     return;
2245
2246   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2247     {
2248       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2249       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2250       tree segment_length_a = dr_a.seg_len;
2251       tree segment_length_b = dr_b.seg_len;
2252
2253       tree addr_base_a
2254         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2255       tree addr_base_b
2256         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2257
2258       if (dump_enabled_p ())
2259         {
2260           dump_printf_loc (MSG_NOTE, vect_location,
2261                            "create runtime check for data references ");
2262           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2263           dump_printf (MSG_NOTE, " and ");
2264           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2265           dump_printf (MSG_NOTE, "\n");
2266         }
2267
2268       tree seg_a_min = addr_base_a;
2269       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2270       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2271          bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2272          [a, a+12) */
2273       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2274         {
2275           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2276           seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2277           seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2278         }
2279
2280       tree seg_b_min = addr_base_b;
2281       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2282       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2283         {
2284           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2285           seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2286           seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2287         }
2288
2289       part_cond_expr =
2290         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2291           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2292           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2293
2294       if (*cond_expr)
2295         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2296                                   *cond_expr, part_cond_expr);
2297       else
2298         *cond_expr = part_cond_expr;
2299     }
2300
2301   if (dump_enabled_p ())
2302     dump_printf_loc (MSG_NOTE, vect_location,
2303                      "created %u versioning for alias checks.\n",
2304                      comp_alias_ddrs.length ());
2305
2306   comp_alias_ddrs.release ();
2307 }
2308
2309
2310 /* Function vect_loop_versioning.
2311
2312    If the loop has data references that may or may not be aligned or/and
2313    has data reference relations whose independence was not proven then
2314    two versions of the loop need to be generated, one which is vectorized
2315    and one which isn't.  A test is then generated to control which of the
2316    loops is executed.  The test checks for the alignment of all of the
2317    data references that may or may not be aligned.  An additional
2318    sequence of runtime tests is generated for each pairs of DDRs whose
2319    independence was not proven.  The vectorized version of loop is
2320    executed only if both alias and alignment tests are passed.
2321
2322    The test generated to check which version of loop is executed
2323    is modified to also check for profitability as indicated by the
2324    cost model initially.
2325
2326    The versioning precondition(s) are placed in *COND_EXPR and
2327    *COND_EXPR_STMT_LIST.  */
2328
2329 void
2330 vect_loop_versioning (loop_vec_info loop_vinfo,
2331                       unsigned int th, bool check_profitability)
2332 {
2333   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2334   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2335   basic_block condition_bb;
2336   gphi_iterator gsi;
2337   gimple_stmt_iterator cond_exp_gsi;
2338   basic_block merge_bb;
2339   basic_block new_exit_bb;
2340   edge new_exit_e, e;
2341   gphi *orig_phi, *new_phi;
2342   tree cond_expr = NULL_TREE;
2343   gimple_seq cond_expr_stmt_list = NULL;
2344   tree arg;
2345   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2346   gimple_seq gimplify_stmt_list = NULL;
2347   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2348   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2349   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2350
2351   if (check_profitability)
2352     {
2353       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2354                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2355       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2356                                           is_gimple_condexpr, NULL_TREE);
2357     }
2358
2359   if (version_align)
2360     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2361                                        &cond_expr_stmt_list);
2362
2363   if (version_alias)
2364     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2365
2366   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2367                                       is_gimple_condexpr, NULL_TREE);
2368   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2369
2370   initialize_original_copy_tables ();
2371   if (scalar_loop)
2372     {
2373       edge scalar_e;
2374       basic_block preheader, scalar_preheader;
2375
2376       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2377          scale LOOP's frequencies instead.  */
2378       loop_version (scalar_loop, cond_expr, &condition_bb,
2379                     prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2380       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2381       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2382          while we need to move it above LOOP's preheader.  */
2383       e = loop_preheader_edge (loop);
2384       scalar_e = loop_preheader_edge (scalar_loop);
2385       gcc_assert (empty_block_p (e->src)
2386                   && single_pred_p (e->src));
2387       gcc_assert (empty_block_p (scalar_e->src)
2388                   && single_pred_p (scalar_e->src));
2389       gcc_assert (single_pred_p (condition_bb));
2390       preheader = e->src;
2391       scalar_preheader = scalar_e->src;
2392       scalar_e = find_edge (condition_bb, scalar_preheader);
2393       e = single_pred_edge (preheader);
2394       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2395                                       scalar_preheader);
2396       redirect_edge_and_branch_force (scalar_e, preheader);
2397       redirect_edge_and_branch_force (e, condition_bb);
2398       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2399                                single_pred (condition_bb));
2400       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2401                                single_pred (scalar_preheader));
2402       set_immediate_dominator (CDI_DOMINATORS, preheader,
2403                                condition_bb);
2404     }
2405   else
2406     loop_version (loop, cond_expr, &condition_bb,
2407                   prob, prob, REG_BR_PROB_BASE - prob, true);
2408
2409   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2410       && dump_enabled_p ())
2411     {
2412       if (version_alias)
2413         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414                          "loop versioned for vectorization because of "
2415                          "possible aliasing\n");
2416       if (version_align)
2417         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2418                          "loop versioned for vectorization to enhance "
2419                          "alignment\n");
2420
2421     }
2422   free_original_copy_tables ();
2423
2424   /* Loop versioning violates an assumption we try to maintain during
2425      vectorization - that the loop exit block has a single predecessor.
2426      After versioning, the exit block of both loop versions is the same
2427      basic block (i.e. it has two predecessors). Just in order to simplify
2428      following transformations in the vectorizer, we fix this situation
2429      here by adding a new (empty) block on the exit-edge of the loop,
2430      with the proper loop-exit phis to maintain loop-closed-form.
2431      If loop versioning wasn't done from loop, but scalar_loop instead,
2432      merge_bb will have already just a single successor.  */
2433
2434   merge_bb = single_exit (loop)->dest;
2435   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2436     {
2437       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2438       new_exit_bb = split_edge (single_exit (loop));
2439       new_exit_e = single_exit (loop);
2440       e = EDGE_SUCC (new_exit_bb, 0);
2441
2442       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2443         {
2444           tree new_res;
2445           orig_phi = gsi.phi ();
2446           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2447           new_phi = create_phi_node (new_res, new_exit_bb);
2448           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2449           add_phi_arg (new_phi, arg, new_exit_e,
2450                        gimple_phi_arg_location_from_edge (orig_phi, e));
2451           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2452         }
2453     }
2454
2455   /* End loop-exit-fixes after versioning.  */
2456
2457   if (cond_expr_stmt_list)
2458     {
2459       cond_exp_gsi = gsi_last_bb (condition_bb);
2460       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2461                              GSI_SAME_STMT);
2462     }
2463   update_ssa (TODO_update_ssa);
2464 }