gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "tree.h"
  28 #include "basic-block.h"
  29 #include "gimple-pretty-print.h"
  30 #include "tree-ssa-alias.h"
  31 #include "internal-fn.h"
  32 #include "gimple-expr.h"
  33 #include "is-a.h"
  34 #include "gimple.h"
  35 #include "gimplify.h"
  36 #include "gimple-iterator.h"
  37 #include "gimplify-me.h"
  38 #include "gimple-ssa.h"
  39 #include "tree-cfg.h"
  40 #include "tree-phinodes.h"
  41 #include "ssa-iterators.h"
  42 #include "stringpool.h"
  43 #include "tree-ssanames.h"
  44 #include "tree-ssa-loop-manip.h"
  45 #include "tree-into-ssa.h"
  46 #include "tree-ssa.h"
  47 #include "tree-pass.h"
  48 #include "cfgloop.h"
  49 #include "diagnostic-core.h"
  50 #include "tree-scalar-evolution.h"
  51 #include "tree-vectorizer.h"
  52 #include "langhooks.h"
  53
  54 /*************************************************************************
  55   Simple Loop Peeling Utilities
  56
  57   Utilities to support loop peeling for vectorization purposes.
  58  *************************************************************************/
  59
  60
  61 /* Renames the use *OP_P.  */
  62
  63 static void
  64 rename_use_op (use_operand_p op_p)
  65 {
  66   tree new_name;
  67
  68   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  69     return;
  70
  71   new_name = get_current_def (USE_FROM_PTR (op_p));
  72
  73   /* Something defined outside of the loop.  */
  74   if (!new_name)
  75     return;
  76
  77   /* An ordinary ssa name defined in the loop.  */
  78
  79   SET_USE (op_p, new_name);
  80 }
  81
  82
  83 /* Renames the variables in basic block BB.  */
  84
  85 static void
  86 rename_variables_in_bb (basic_block bb)
  87 {
  88   gimple_stmt_iterator gsi;
  89   gimple stmt;
  90   use_operand_p use_p;
  91   ssa_op_iter iter;
  92   edge e;
  93   edge_iterator ei;
  94   struct loop *loop = bb->loop_father;
  95
  96   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         continue;
 107       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 108         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
 109     }
 110 }
 111
 112
 113 typedef struct
 114 {
 115   tree from, to;
 116   basic_block bb;
 117 } adjust_info;
 118
 119 /* A stack of values to be adjusted in debug stmts.  We have to
 120    process them LIFO, so that the closest substitution applies.  If we
 121    processed them FIFO, without the stack, we might substitute uses
 122    with a PHI DEF that would soon become non-dominant, and when we got
 123    to the suitable one, it wouldn't have anything to substitute any
 124    more.  */
 125 static vec<adjust_info, va_heap> adjust_vec;
 126
 127 /* Adjust any debug stmts that referenced AI->from values to use the
 128    loop-closed AI->to, if the references are dominated by AI->bb and
 129    not by the definition of AI->from.  */
 130
 131 static void
 132 adjust_debug_stmts_now (adjust_info *ai)
 133 {
 134   basic_block bbphi = ai->bb;
 135   tree orig_def = ai->from;
 136   tree new_def = ai->to;
 137   imm_use_iterator imm_iter;
 138   gimple stmt;
 139   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 140
 141   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 142
 143   /* Adjust any debug stmts that held onto non-loop-closed
 144      references.  */
 145   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 146     {
 147       use_operand_p use_p;
 148       basic_block bbuse;
 149
 150       if (!is_gimple_debug (stmt))
 151         continue;
 152
 153       gcc_assert (gimple_debug_bind_p (stmt));
 154
 155       bbuse = gimple_bb (stmt);
 156
 157       if ((bbuse == bbphi
 158            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 159           && !(bbuse == bbdef
 160                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 161         {
 162           if (new_def)
 163             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 164               SET_USE (use_p, new_def);
 165           else
 166             {
 167               gimple_debug_bind_reset_value (stmt);
 168               update_stmt (stmt);
 169             }
 170         }
 171     }
 172 }
 173
 174 /* Adjust debug stmts as scheduled before.  */
 175
 176 static void
 177 adjust_vec_debug_stmts (void)
 178 {
 179   if (!MAY_HAVE_DEBUG_STMTS)
 180     return;
 181
 182   gcc_assert (adjust_vec.exists ());
 183
 184   while (!adjust_vec.is_empty ())
 185     {
 186       adjust_debug_stmts_now (&adjust_vec.last ());
 187       adjust_vec.pop ();
 188     }
 189
 190   adjust_vec.release ();
 191 }
 192
 193 /* Adjust any debug stmts that referenced FROM values to use the
 194    loop-closed TO, if the references are dominated by BB and not by
 195    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 196    will be postponed until adjust_vec_debug_stmts is called.  */
 197
 198 static void
 199 adjust_debug_stmts (tree from, tree to, basic_block bb)
 200 {
 201   adjust_info ai;
 202
 203   if (MAY_HAVE_DEBUG_STMTS
 204       && TREE_CODE (from) == SSA_NAME
 205       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 206       && ! virtual_operand_p (from))
 207     {
 208       ai.from = from;
 209       ai.to = to;
 210       ai.bb = bb;
 211
 212       if (adjust_vec.exists ())
 213         adjust_vec.safe_push (ai);
 214       else
 215         adjust_debug_stmts_now (&ai);
 216     }
 217 }
 218
 219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 220    to adjust any debug stmts that referenced the old phi arg,
 221    presumably non-loop-closed references left over from other
 222    transformations.  */
 223
 224 static void
 225 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 226 {
 227   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 228
 229   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 230
 231   if (MAY_HAVE_DEBUG_STMTS)
 232     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 233                         gimple_bb (update_phi));
 234 }
 235
 236
 237 /* Update PHI nodes for a guard of the LOOP.
 238
 239    Input:
 240    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 241         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 242         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 243         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 244         We denote this bb NEW_MERGE_BB because before the guard code was added
 245         it had a single predecessor (the LOOP header), and now it became a merge
 246         point of two paths - the path that ends with the LOOP exit-edge, and
 247         the path that ends with GUARD_EDGE.
 248    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 249         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 250
 251    ===> The CFG before the guard-code was added:
 252         LOOP_header_bb:
 253           loop_body
 254           if (exit_loop) goto update_bb
 255           else           goto LOOP_header_bb
 256         update_bb:
 257
 258    ==> The CFG after the guard-code was added:
 259         guard_bb:
 260           if (LOOP_guard_condition) goto new_merge_bb
 261           else                      goto LOOP_header_bb
 262         LOOP_header_bb:
 263           loop_body
 264           if (exit_loop_condition) goto new_merge_bb
 265           else                     goto LOOP_header_bb
 266         new_merge_bb:
 267           goto update_bb
 268         update_bb:
 269
 270    ==> The CFG after this function:
 271         guard_bb:
 272           if (LOOP_guard_condition) goto new_merge_bb
 273           else                      goto LOOP_header_bb
 274         LOOP_header_bb:
 275           loop_body
 276           if (exit_loop_condition) goto new_exit_bb
 277           else                     goto LOOP_header_bb
 278         new_exit_bb:
 279         new_merge_bb:
 280           goto update_bb
 281         update_bb:
 282
 283    This function:
 284    1. creates and updates the relevant phi nodes to account for the new
 285       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 286       1.1. Create phi nodes at NEW_MERGE_BB.
 287       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 288            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 289    2. preserves loop-closed-ssa-form by creating the required phi nodes
 290       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 291
 292    There are two flavors to this function:
 293
 294    slpeel_update_phi_nodes_for_guard1:
 295      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 296      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 297      for variables that have phis in the loop header.
 298
 299    slpeel_update_phi_nodes_for_guard2:
 300      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 301      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 302      for variables that have phis in the loop exit.
 303
 304    I.E., the overall structure is:
 305
 306         loop1_preheader_bb:
 307                 guard1 (goto loop1/merge1_bb)
 308         loop1
 309         loop1_exit_bb:
 310                 guard2 (goto merge1_bb/merge2_bb)
 311         merge1_bb
 312         loop2
 313         loop2_exit_bb
 314         merge2_bb
 315         next_bb
 316
 317    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 318    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 319    that have phis in loop1->header).
 320
 321    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 322    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 323    that have phis in next_bb). It also adds some of these phis to
 324    loop1_exit_bb.
 325
 326    slpeel_update_phi_nodes_for_guard1 is always called before
 327    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 328    to create correct data-flow and loop-closed-ssa-form.
 329
 330    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 331    that change between iterations of a loop (and therefore have a phi-node
 332    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 333    phis for variables that are used out of the loop (and therefore have
 334    loop-closed exit phis). Some variables may be both updated between
 335    iterations and used after the loop. This is why in loop1_exit_bb we
 336    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 337    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 338
 339    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 340      an original loop. i.e., we have:
 341
 342            orig_loop
 343            guard_bb (goto LOOP/new_merge)
 344            new_loop <-- LOOP
 345            new_exit
 346            new_merge
 347            next_bb
 348
 349      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 350      have:
 351
 352            new_loop
 353            guard_bb (goto LOOP/new_merge)
 354            orig_loop <-- LOOP
 355            new_exit
 356            new_merge
 357            next_bb
 358
 359      The SSA names defined in the original loop have a current
 360      reaching definition that that records the corresponding new
 361      ssa-name used in the new duplicated loop copy.
 362   */
 363
 364 /* Function slpeel_update_phi_nodes_for_guard1
 365
 366    Input:
 367    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 368    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 369             information.
 370
 371    In the context of the overall structure, we have:
 372
 373         loop1_preheader_bb:
 374                 guard1 (goto loop1/merge1_bb)
 375 LOOP->  loop1
 376         loop1_exit_bb:
 377                 guard2 (goto merge1_bb/merge2_bb)
 378         merge1_bb
 379         loop2
 380         loop2_exit_bb
 381         merge2_bb
 382         next_bb
 383
 384    For each name updated between loop iterations (i.e - for each name that has
 385    an entry (loop-header) phi in LOOP) we create a new phi in:
 386    1. merge1_bb (to account for the edge from guard1)
 387    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 388 */
 389
 390 static void
 391 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 392                                     bool is_new_loop, basic_block *new_exit_bb)
 393 {
 394   gimple orig_phi, new_phi;
 395   gimple update_phi, update_phi2;
 396   tree guard_arg, loop_arg;
 397   basic_block new_merge_bb = guard_edge->dest;
 398   edge e = EDGE_SUCC (new_merge_bb, 0);
 399   basic_block update_bb = e->dest;
 400   basic_block orig_bb = loop->header;
 401   edge new_exit_e;
 402   tree current_new_name;
 403   gimple_stmt_iterator gsi_orig, gsi_update;
 404
 405   /* Create new bb between loop and new_merge_bb.  */
 406   *new_exit_bb = split_edge (single_exit (loop));
 407
 408   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 409
 410   for (gsi_orig = gsi_start_phis (orig_bb),
 411        gsi_update = gsi_start_phis (update_bb);
 412        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 413        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 414     {
 415       source_location loop_locus, guard_locus;
 416       tree new_res;
 417       orig_phi = gsi_stmt (gsi_orig);
 418       update_phi = gsi_stmt (gsi_update);
 419
 420       /** 1. Handle new-merge-point phis  **/
 421
 422       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 423       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 424       new_phi = create_phi_node (new_res, new_merge_bb);
 425
 426       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 427             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 428       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 429       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 430                                                       EDGE_SUCC (loop->latch,
 431                                                                  0));
 432       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 433       guard_locus
 434         = gimple_phi_arg_location_from_edge (orig_phi,
 435                                              loop_preheader_edge (loop));
 436
 437       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 438       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 439
 440       /* 1.3. Update phi in successor block.  */
 441       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 442                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 443       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 444       update_phi2 = new_phi;
 445
 446
 447       /** 2. Handle loop-closed-ssa-form phis  **/
 448
 449       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 450         continue;
 451
 452       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 453       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 454       new_phi = create_phi_node (new_res, *new_exit_bb);
 455
 456       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 457       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 458
 459       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 460       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 461       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 462                                   PHI_RESULT (new_phi));
 463
 464       /* 2.4. Record the newly created name with set_current_def.
 465          We want to find a name such that
 466                 name = get_current_def (orig_loop_name)
 467          and to set its current definition as follows:
 468                 set_current_def (name, new_phi_name)
 469
 470          If LOOP is a new loop then loop_arg is already the name we're
 471          looking for. If LOOP is the original loop, then loop_arg is
 472          the orig_loop_name and the relevant name is recorded in its
 473          current reaching definition.  */
 474       if (is_new_loop)
 475         current_new_name = loop_arg;
 476       else
 477         {
 478           current_new_name = get_current_def (loop_arg);
 479           /* current_def is not available only if the variable does not
 480              change inside the loop, in which case we also don't care
 481              about recording a current_def for it because we won't be
 482              trying to create loop-exit-phis for it.  */
 483           if (!current_new_name)
 484             continue;
 485         }
 486       tree new_name = get_current_def (current_new_name);
 487       /* Because of peeled_chrec optimization it is possible that we have
 488          set this earlier.  Verify the PHI has the same value.  */
 489       if (new_name)
 490         {
 491           gimple phi = SSA_NAME_DEF_STMT (new_name);
 492           gcc_assert (gimple_code (phi) == GIMPLE_PHI
 493                       && gimple_bb (phi) == *new_exit_bb
 494                       && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
 495                           == loop_arg));
 496           continue;
 497         }
 498
 499       set_current_def (current_new_name, PHI_RESULT (new_phi));
 500     }
 501 }
 502
 503
 504 /* Function slpeel_update_phi_nodes_for_guard2
 505
 506    Input:
 507    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 508
 509    In the context of the overall structure, we have:
 510
 511         loop1_preheader_bb:
 512                 guard1 (goto loop1/merge1_bb)
 513         loop1
 514         loop1_exit_bb:
 515                 guard2 (goto merge1_bb/merge2_bb)
 516         merge1_bb
 517 LOOP->  loop2
 518         loop2_exit_bb
 519         merge2_bb
 520         next_bb
 521
 522    For each name used out side the loop (i.e - for each name that has an exit
 523    phi in next_bb) we create a new phi in:
 524    1. merge2_bb (to account for the edge from guard_bb)
 525    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 526    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 527       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 528 */
 529
 530 static void
 531 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 532                                     bool is_new_loop, basic_block *new_exit_bb)
 533 {
 534   gimple orig_phi, new_phi;
 535   gimple update_phi, update_phi2;
 536   tree guard_arg, loop_arg;
 537   basic_block new_merge_bb = guard_edge->dest;
 538   edge e = EDGE_SUCC (new_merge_bb, 0);
 539   basic_block update_bb = e->dest;
 540   edge new_exit_e;
 541   tree orig_def, orig_def_new_name;
 542   tree new_name, new_name2;
 543   tree arg;
 544   gimple_stmt_iterator gsi;
 545
 546   /* Create new bb between loop and new_merge_bb.  */
 547   *new_exit_bb = split_edge (single_exit (loop));
 548
 549   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 550
 551   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 552     {
 553       tree new_res;
 554       update_phi = gsi_stmt (gsi);
 555       orig_phi = update_phi;
 556       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 557       /* This loop-closed-phi actually doesn't represent a use
 558          out of the loop - the phi arg is a constant.  */
 559       if (TREE_CODE (orig_def) != SSA_NAME)
 560         continue;
 561       orig_def_new_name = get_current_def (orig_def);
 562       arg = NULL_TREE;
 563
 564       /** 1. Handle new-merge-point phis  **/
 565
 566       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 567       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 568       new_phi = create_phi_node (new_res, new_merge_bb);
 569
 570       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 571             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 572       new_name = orig_def;
 573       new_name2 = NULL_TREE;
 574       if (orig_def_new_name)
 575         {
 576           new_name = orig_def_new_name;
 577           /* Some variables have both loop-entry-phis and loop-exit-phis.
 578              Such variables were given yet newer names by phis placed in
 579              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 580              new_name2 = get_current_def (get_current_def (orig_name)).  */
 581           new_name2 = get_current_def (new_name);
 582         }
 583
 584       if (is_new_loop)
 585         {
 586           guard_arg = orig_def;
 587           loop_arg = new_name;
 588         }
 589       else
 590         {
 591           guard_arg = new_name;
 592           loop_arg = orig_def;
 593         }
 594       if (new_name2)
 595         guard_arg = new_name2;
 596
 597       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 598       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 599
 600       /* 1.3. Update phi in successor block.  */
 601       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 602       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 603       update_phi2 = new_phi;
 604
 605
 606       /** 2. Handle loop-closed-ssa-form phis  **/
 607
 608       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 609       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 610       new_phi = create_phi_node (new_res, *new_exit_bb);
 611
 612       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 613       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 614
 615       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 616       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 617       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 618                                   PHI_RESULT (new_phi));
 619
 620
 621       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 622
 623       /* 3.1. Find the relevant names that need an exit-phi in
 624          GUARD_BB, i.e. names for which
 625          slpeel_update_phi_nodes_for_guard1 had not already created a
 626          phi node. This is the case for names that are used outside
 627          the loop (and therefore need an exit phi) but are not updated
 628          across loop iterations (and therefore don't have a
 629          loop-header-phi).
 630
 631          slpeel_update_phi_nodes_for_guard1 is responsible for
 632          creating loop-exit phis in GUARD_BB for names that have a
 633          loop-header-phi.  When such a phi is created we also record
 634          the new name in its current definition.  If this new name
 635          exists, then guard_arg was set to this new name (see 1.2
 636          above).  Therefore, if guard_arg is not this new name, this
 637          is an indication that an exit-phi in GUARD_BB was not yet
 638          created, so we take care of it here.  */
 639       if (guard_arg == new_name2)
 640         continue;
 641       arg = guard_arg;
 642
 643       /* 3.2. Generate new phi node in GUARD_BB:  */
 644       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 645       new_phi = create_phi_node (new_res, guard_edge->src);
 646
 647       /* 3.3. GUARD_BB has one incoming edge:  */
 648       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 649       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 650                    UNKNOWN_LOCATION);
 651
 652       /* 3.4. Update phi in successor of GUARD_BB:  */
 653       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 654                                                                 == guard_arg);
 655       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 656                                   PHI_RESULT (new_phi));
 657     }
 658 }
 659
 660
 661 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 662    that starts at zero, increases by one and its limit is NITERS.
 663
 664    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 665
 666 void
 667 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 668 {
 669   tree indx_before_incr, indx_after_incr;
 670   gimple cond_stmt;
 671   gimple orig_cond;
 672   edge exit_edge = single_exit (loop);
 673   gimple_stmt_iterator loop_cond_gsi;
 674   gimple_stmt_iterator incr_gsi;
 675   bool insert_after;
 676   tree init = build_int_cst (TREE_TYPE (niters), 0);
 677   tree step = build_int_cst (TREE_TYPE (niters), 1);
 678   source_location loop_loc;
 679   enum tree_code code;
 680
 681   orig_cond = get_loop_exit_condition (loop);
 682   gcc_assert (orig_cond);
 683   loop_cond_gsi = gsi_for_stmt (orig_cond);
 684
 685   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 686   create_iv (init, step, NULL_TREE, loop,
 687              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 688
 689   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 690                                               true, NULL_TREE, true,
 691                                               GSI_SAME_STMT);
 692   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 693                                      true, GSI_SAME_STMT);
 694
 695   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 696   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 697                                  NULL_TREE);
 698
 699   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 700
 701   /* Remove old loop exit test:  */
 702   gsi_remove (&loop_cond_gsi, true);
 703   free_stmt_vec_info (orig_cond);
 704
 705   loop_loc = find_loop_location (loop);
 706   if (dump_enabled_p ())
 707     {
 708       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 709         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 710                      LOCATION_LINE (loop_loc));
 711       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 712       dump_printf (MSG_NOTE, "\n");
 713     }
 714   loop->nb_iterations = niters;
 715 }
 716
 717 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 718    For all PHI arguments in FROM->dest and TO->dest from those
 719    edges ensure that TO->dest PHI arguments have current_def
 720    to that in from.  */
 721
 722 static void
 723 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 724 {
 725   gimple_stmt_iterator gsi_from, gsi_to;
 726
 727   for (gsi_from = gsi_start_phis (from->dest),
 728        gsi_to = gsi_start_phis (to->dest);
 729        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
 730        gsi_next (&gsi_from), gsi_next (&gsi_to))
 731     {
 732       gimple from_phi = gsi_stmt (gsi_from);
 733       gimple to_phi = gsi_stmt (gsi_to);
 734       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 735       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 736       if (TREE_CODE (from_arg) == SSA_NAME
 737           && TREE_CODE (to_arg) == SSA_NAME
 738           && get_current_def (to_arg) == NULL_TREE)
 739         set_current_def (to_arg, get_current_def (from_arg));
 740     }
 741 }
 742
 743
 744 /* Given LOOP this function generates a new copy of it and puts it
 745    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 746    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 747    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 748    entry or exit of LOOP.  */
 749
 750 struct loop *
 751 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 752                                         struct loop *scalar_loop, edge e)
 753 {
 754   struct loop *new_loop;
 755   basic_block *new_bbs, *bbs;
 756   bool at_exit;
 757   bool was_imm_dom;
 758   basic_block exit_dest;
 759   edge exit, new_exit;
 760
 761   exit = single_exit (loop);
 762   at_exit = (e == exit);
 763   if (!at_exit && e != loop_preheader_edge (loop))
 764     return NULL;
 765
 766   if (scalar_loop == NULL)
 767     scalar_loop = loop;
 768
 769   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 770   get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
 771
 772   /* Check whether duplication is possible.  */
 773   if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
 774     {
 775       free (bbs);
 776       return NULL;
 777     }
 778
 779   /* Generate new loop structure.  */
 780   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 781   duplicate_subloops (scalar_loop, new_loop);
 782
 783   exit_dest = exit->dest;
 784   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 785                                           exit_dest) == loop->header ?
 786                  true : false);
 787
 788   /* Also copy the pre-header, this avoids jumping through hoops to
 789      duplicate the loop entry PHI arguments.  Create an empty
 790      pre-header unconditionally for this.  */
 791   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 792   edge entry_e = single_pred_edge (preheader);
 793   bbs[scalar_loop->num_nodes] = preheader;
 794   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 795
 796   exit = single_exit (scalar_loop);
 797   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 798             &exit, 1, &new_exit, NULL,
 799             e->src, true);
 800   exit = single_exit (loop);
 801   basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
 802
 803   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 804
 805   if (scalar_loop != loop)
 806     {
 807       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 808          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 809          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 810          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 811          header) to have current_def set, so copy them over.  */
 812       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 813                                                 exit);
 814       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 815                                                            0),
 816                                                 EDGE_SUCC (loop->latch, 0));
 817     }
 818
 819   if (at_exit) /* Add the loop copy at exit.  */
 820     {
 821       if (scalar_loop != loop)
 822         {
 823           gimple_stmt_iterator gsi;
 824           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 825
 826           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 827                gsi_next (&gsi))
 828             {
 829               gimple phi = gsi_stmt (gsi);
 830               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 831               location_t orig_locus
 832                 = gimple_phi_arg_location_from_edge (phi, e);
 833
 834               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 835             }
 836         }
 837       redirect_edge_and_branch_force (e, new_preheader);
 838       flush_pending_stmts (e);
 839       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 840       if (was_imm_dom)
 841         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 842
 843       /* And remove the non-necessary forwarder again.  Keep the other
 844          one so we have a proper pre-header for the loop at the exit edge.  */
 845       redirect_edge_pred (single_succ_edge (preheader),
 846                           single_pred (preheader));
 847       delete_basic_block (preheader);
 848       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 849                                loop_preheader_edge (scalar_loop)->src);
 850     }
 851   else /* Add the copy at entry.  */
 852     {
 853       if (scalar_loop != loop)
 854         {
 855           /* Remove the non-necessary forwarder of scalar_loop again.  */
 856           redirect_edge_pred (single_succ_edge (preheader),
 857                               single_pred (preheader));
 858           delete_basic_block (preheader);
 859           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 860                                    loop_preheader_edge (scalar_loop)->src);
 861           preheader = split_edge (loop_preheader_edge (loop));
 862           entry_e = single_pred_edge (preheader);
 863         }
 864
 865       redirect_edge_and_branch_force (entry_e, new_preheader);
 866       flush_pending_stmts (entry_e);
 867       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 868
 869       redirect_edge_and_branch_force (new_exit, preheader);
 870       flush_pending_stmts (new_exit);
 871       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 872
 873       /* And remove the non-necessary forwarder again.  Keep the other
 874          one so we have a proper pre-header for the loop at the exit edge.  */
 875       redirect_edge_pred (single_succ_edge (new_preheader),
 876                           single_pred (new_preheader));
 877       delete_basic_block (new_preheader);
 878       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 879                                loop_preheader_edge (new_loop)->src);
 880     }
 881
 882   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 883     rename_variables_in_bb (new_bbs[i]);
 884
 885   if (scalar_loop != loop)
 886     {
 887       /* Update new_loop->header PHIs, so that on the preheader
 888          edge they are the ones from loop rather than scalar_loop.  */
 889       gimple_stmt_iterator gsi_orig, gsi_new;
 890       edge orig_e = loop_preheader_edge (loop);
 891       edge new_e = loop_preheader_edge (new_loop);
 892
 893       for (gsi_orig = gsi_start_phis (loop->header),
 894            gsi_new = gsi_start_phis (new_loop->header);
 895            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 896            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 897         {
 898           gimple orig_phi = gsi_stmt (gsi_orig);
 899           gimple new_phi = gsi_stmt (gsi_new);
 900           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 901           location_t orig_locus
 902             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 903
 904           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 905         }
 906     }
 907
 908   free (new_bbs);
 909   free (bbs);
 910
 911 #ifdef ENABLE_CHECKING
 912   verify_dominators (CDI_DOMINATORS);
 913 #endif
 914
 915   return new_loop;
 916 }
 917
 918
 919 /* Given the condition statement COND, put it as the last statement
 920    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 921    Assumes that this is the single exit of the guarded loop.
 922    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 923
 924 static edge
 925 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 926                        gimple_seq cond_expr_stmt_list,
 927                        basic_block exit_bb, basic_block dom_bb,
 928                        int probability)
 929 {
 930   gimple_stmt_iterator gsi;
 931   edge new_e, enter_e;
 932   gimple cond_stmt;
 933   gimple_seq gimplify_stmt_list = NULL;
 934
 935   enter_e = EDGE_SUCC (guard_bb, 0);
 936   enter_e->flags &= ~EDGE_FALLTHRU;
 937   enter_e->flags |= EDGE_FALSE_VALUE;
 938   gsi = gsi_last_bb (guard_bb);
 939
 940   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 941                                  NULL_TREE);
 942   if (gimplify_stmt_list)
 943     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 944   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 945   if (cond_expr_stmt_list)
 946     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 947
 948   gsi = gsi_last_bb (guard_bb);
 949   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 950
 951   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 952   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 953
 954   new_e->count = guard_bb->count;
 955   new_e->probability = probability;
 956   new_e->count = apply_probability (enter_e->count, probability);
 957   enter_e->count -= new_e->count;
 958   enter_e->probability = inverse_probability (probability);
 959   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 960   return new_e;
 961 }
 962
 963
 964 /* This function verifies that the following restrictions apply to LOOP:
 965    (1) it is innermost
 966    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 967    (3) it is single entry, single exit
 968    (4) its exit condition is the last stmt in the header
 969    (5) E is the entry/exit edge of LOOP.
 970  */
 971
 972 bool
 973 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 974 {
 975   edge exit_e = single_exit (loop);
 976   edge entry_e = loop_preheader_edge (loop);
 977   gimple orig_cond = get_loop_exit_condition (loop);
 978   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 979
 980   if (loop->inner
 981       /* All loops have an outer scope; the only case loop->outer is NULL is for
 982          the function itself.  */
 983       || !loop_outer (loop)
 984       || loop->num_nodes != 2
 985       || !empty_block_p (loop->latch)
 986       || !single_exit (loop)
 987       /* Verify that new loop exit condition can be trivially modified.  */
 988       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 989       || (e != exit_e && e != entry_e))
 990     return false;
 991
 992   return true;
 993 }
 994
 995 #ifdef ENABLE_CHECKING
 996 static void
 997 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 998                                  struct loop *second_loop)
 999 {
1000   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1001   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1002   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1003
1004   /* A guard that controls whether the second_loop is to be executed or skipped
1005      is placed in first_loop->exit.  first_loop->exit therefore has two
1006      successors - one is the preheader of second_loop, and the other is a bb
1007      after second_loop.
1008    */
1009   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1010
1011   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1012         of second_loop.  */
1013
1014   /* The preheader of new_loop is expected to have two predecessors:
1015      first_loop->exit and the block that precedes first_loop.  */
1016
1017   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1018               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1019                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1020                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1021                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1022
1023   /* Verify that the other successor of first_loop->exit is after the
1024      second_loop.  */
1025   /* TODO */
1026 }
1027 #endif
1028
1029 /* If the run time cost model check determines that vectorization is
1030    not profitable and hence scalar loop should be generated then set
1031    FIRST_NITERS to prologue peeled iterations. This will allow all the
1032    iterations to be executed in the prologue peeled scalar loop.  */
1033
1034 static void
1035 set_prologue_iterations (basic_block bb_before_first_loop,
1036                          tree *first_niters,
1037                          struct loop *loop,
1038                          unsigned int th,
1039                          int probability)
1040 {
1041   edge e;
1042   basic_block cond_bb, then_bb;
1043   tree var, prologue_after_cost_adjust_name;
1044   gimple_stmt_iterator gsi;
1045   gimple newphi;
1046   edge e_true, e_false, e_fallthru;
1047   gimple cond_stmt;
1048   gimple_seq stmts = NULL;
1049   tree cost_pre_condition = NULL_TREE;
1050   tree scalar_loop_iters =
1051     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1052
1053   e = single_pred_edge (bb_before_first_loop);
1054   cond_bb = split_edge (e);
1055
1056   e = single_pred_edge (bb_before_first_loop);
1057   then_bb = split_edge (e);
1058   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1059
1060   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1061                                    EDGE_FALSE_VALUE);
1062   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1063
1064   e_true = EDGE_PRED (then_bb, 0);
1065   e_true->flags &= ~EDGE_FALLTHRU;
1066   e_true->flags |= EDGE_TRUE_VALUE;
1067
1068   e_true->probability = probability;
1069   e_false->probability = inverse_probability (probability);
1070   e_true->count = apply_probability (cond_bb->count, probability);
1071   e_false->count = cond_bb->count - e_true->count;
1072   then_bb->frequency = EDGE_FREQUENCY (e_true);
1073   then_bb->count = e_true->count;
1074
1075   e_fallthru = EDGE_SUCC (then_bb, 0);
1076   e_fallthru->count = then_bb->count;
1077
1078   gsi = gsi_last_bb (cond_bb);
1079   cost_pre_condition =
1080     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1081                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1082   cost_pre_condition =
1083     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1084                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
1085   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1086                                            NULL_TREE, NULL_TREE);
1087   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1088
1089   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1090                         "prologue_after_cost_adjust");
1091   prologue_after_cost_adjust_name =
1092     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1093
1094   gsi = gsi_last_bb (then_bb);
1095   if (stmts)
1096     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1097
1098   newphi = create_phi_node (var, bb_before_first_loop);
1099   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1100                UNKNOWN_LOCATION);
1101   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1102
1103   *first_niters = PHI_RESULT (newphi);
1104 }
1105
1106 /* Function slpeel_tree_peel_loop_to_edge.
1107
1108    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1109    that is placed on the entry (exit) edge E of LOOP. After this transformation
1110    we have two loops one after the other - first-loop iterates FIRST_NITERS
1111    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1112    If the cost model indicates that it is profitable to emit a scalar
1113    loop instead of the vector one, then the prolog (epilog) loop will iterate
1114    for the entire unchanged scalar iterations of the loop.
1115
1116    Input:
1117    - LOOP: the loop to be peeled.
1118    - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1119         should be copied.
1120    - E: the exit or entry edge of LOOP.
1121         If it is the entry edge, we peel the first iterations of LOOP. In this
1122         case first-loop is LOOP, and second-loop is the newly created loop.
1123         If it is the exit edge, we peel the last iterations of LOOP. In this
1124         case, first-loop is the newly created loop, and second-loop is LOOP.
1125    - NITERS: the number of iterations that LOOP iterates.
1126    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1127    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1128         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1129         is false, the caller of this function may want to take care of this
1130         (this can be useful if we don't want new stmts added to first-loop).
1131    - TH: cost model profitability threshold of iterations for vectorization.
1132    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1133                           during versioning and hence needs to occur during
1134                           prologue generation or whether cost model check
1135                           has not occurred during prologue generation and hence
1136                           needs to occur during epilogue generation.
1137    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1138    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1139
1140
1141    Output:
1142    The function returns a pointer to the new loop-copy, or NULL if it failed
1143    to perform the transformation.
1144
1145    The function generates two if-then-else guards: one before the first loop,
1146    and the other before the second loop:
1147    The first guard is:
1148      if (FIRST_NITERS == 0) then skip the first loop,
1149      and go directly to the second loop.
1150    The second guard is:
1151      if (FIRST_NITERS == NITERS) then skip the second loop.
1152
1153    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1154    then the generated condition is combined with COND_EXPR and the
1155    statements in COND_EXPR_STMT_LIST are emitted together with it.
1156
1157    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1158    FORNOW the resulting code will not be in loop-closed-ssa form.
1159 */
1160
1161 static struct loop *
1162 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
1163                                edge e, tree *first_niters,
1164                                tree niters, bool update_first_loop_count,
1165                                unsigned int th, bool check_profitability,
1166                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1167                                int bound1, int bound2)
1168 {
1169   struct loop *new_loop = NULL, *first_loop, *second_loop;
1170   edge skip_e;
1171   tree pre_condition = NULL_TREE;
1172   basic_block bb_before_second_loop, bb_after_second_loop;
1173   basic_block bb_before_first_loop;
1174   basic_block bb_between_loops;
1175   basic_block new_exit_bb;
1176   gimple_stmt_iterator gsi;
1177   edge exit_e = single_exit (loop);
1178   source_location loop_loc;
1179   /* There are many aspects to how likely the first loop is going to be executed.
1180      Without histogram we can't really do good job.  Simply set it to
1181      2/3, so the first loop is not reordered to the end of function and
1182      the hot path through stays short.  */
1183   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1184   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1185   int probability_of_second_loop;
1186
1187   if (!slpeel_can_duplicate_loop_p (loop, e))
1188     return NULL;
1189
1190   /* We might have a queued need to update virtual SSA form.  As we
1191      delete the update SSA machinery below after doing a regular
1192      incremental SSA update during loop copying make sure we don't
1193      lose that fact.
1194      ???  Needing to update virtual SSA form by renaming is unfortunate
1195      but not all of the vectorizer code inserting new loads / stores
1196      properly assigns virtual operands to those statements.  */
1197   update_ssa (TODO_update_ssa_only_virtuals);
1198
1199   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1200      in the exit bb and rename all the uses after the loop.  This simplifies
1201      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1202      (but normally loop closed SSA form doesn't require virtual PHIs to be
1203      in the same form).  Doing this early simplifies the checking what
1204      uses should be renamed.  */
1205   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1206     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1207       {
1208         gimple phi = gsi_stmt (gsi);
1209         for (gsi = gsi_start_phis (exit_e->dest);
1210              !gsi_end_p (gsi); gsi_next (&gsi))
1211           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1212             break;
1213         if (gsi_end_p (gsi))
1214           {
1215             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1216             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1217             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1218             imm_use_iterator imm_iter;
1219             gimple stmt;
1220             use_operand_p use_p;
1221
1222             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1223             gimple_phi_set_result (new_phi, new_vop);
1224             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1225               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1226                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1227                   SET_USE (use_p, new_vop);
1228           }
1229         break;
1230       }
1231
1232   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1233         Resulting CFG would be:
1234
1235         first_loop:
1236         do {
1237         } while ...
1238
1239         second_loop:
1240         do {
1241         } while ...
1242
1243         orig_exit_bb:
1244    */
1245
1246   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1247                                                            e)))
1248     {
1249       loop_loc = find_loop_location (loop);
1250       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1251                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1252       return NULL;
1253     }
1254
1255   if (MAY_HAVE_DEBUG_STMTS)
1256     {
1257       gcc_assert (!adjust_vec.exists ());
1258       adjust_vec.create (32);
1259     }
1260
1261   if (e == exit_e)
1262     {
1263       /* NEW_LOOP was placed after LOOP.  */
1264       first_loop = loop;
1265       second_loop = new_loop;
1266     }
1267   else
1268     {
1269       /* NEW_LOOP was placed before LOOP.  */
1270       first_loop = new_loop;
1271       second_loop = loop;
1272     }
1273
1274   /* 2.  Add the guard code in one of the following ways:
1275
1276      2.a Add the guard that controls whether the first loop is executed.
1277          This occurs when this function is invoked for prologue or epilogue
1278          generation and when the cost model check can be done at compile time.
1279
1280          Resulting CFG would be:
1281
1282          bb_before_first_loop:
1283          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1284                                 GOTO first-loop
1285
1286          first_loop:
1287          do {
1288          } while ...
1289
1290          bb_before_second_loop:
1291
1292          second_loop:
1293          do {
1294          } while ...
1295
1296          orig_exit_bb:
1297
1298      2.b Add the cost model check that allows the prologue
1299          to iterate for the entire unchanged scalar
1300          iterations of the loop in the event that the cost
1301          model indicates that the scalar loop is more
1302          profitable than the vector one. This occurs when
1303          this function is invoked for prologue generation
1304          and the cost model check needs to be done at run
1305          time.
1306
1307          Resulting CFG after prologue peeling would be:
1308
1309          if (scalar_loop_iterations <= th)
1310            FIRST_NITERS = scalar_loop_iterations
1311
1312          bb_before_first_loop:
1313          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1314                                 GOTO first-loop
1315
1316          first_loop:
1317          do {
1318          } while ...
1319
1320          bb_before_second_loop:
1321
1322          second_loop:
1323          do {
1324          } while ...
1325
1326          orig_exit_bb:
1327
1328      2.c Add the cost model check that allows the epilogue
1329          to iterate for the entire unchanged scalar
1330          iterations of the loop in the event that the cost
1331          model indicates that the scalar loop is more
1332          profitable than the vector one. This occurs when
1333          this function is invoked for epilogue generation
1334          and the cost model check needs to be done at run
1335          time.  This check is combined with any pre-existing
1336          check in COND_EXPR to avoid versioning.
1337
1338          Resulting CFG after prologue peeling would be:
1339
1340          bb_before_first_loop:
1341          if ((scalar_loop_iterations <= th)
1342              ||
1343              FIRST_NITERS == 0) GOTO bb_before_second_loop
1344                                 GOTO first-loop
1345
1346          first_loop:
1347          do {
1348          } while ...
1349
1350          bb_before_second_loop:
1351
1352          second_loop:
1353          do {
1354          } while ...
1355
1356          orig_exit_bb:
1357   */
1358
1359   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1360   /* Loop copying insterted a forwarder block for us here.  */
1361   bb_before_second_loop = single_exit (first_loop)->dest;
1362
1363   probability_of_second_loop = (inverse_probability (first_guard_probability)
1364                                 + combine_probabilities (second_guard_probability,
1365                                                          first_guard_probability));
1366   /* Theoretically preheader edge of first loop and exit edge should have
1367      same frequencies.  Loop exit probablities are however easy to get wrong.
1368      It is safer to copy value from original loop entry.  */
1369   bb_before_second_loop->frequency
1370      = combine_probabilities (bb_before_first_loop->frequency,
1371                               probability_of_second_loop);
1372   bb_before_second_loop->count
1373      = apply_probability (bb_before_first_loop->count,
1374                           probability_of_second_loop);
1375   single_succ_edge (bb_before_second_loop)->count
1376      = bb_before_second_loop->count;
1377
1378   /* Epilogue peeling.  */
1379   if (!update_first_loop_count)
1380     {
1381       loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1382       tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1383       unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1384       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1385         limit = limit + 1;
1386       if (check_profitability
1387           && th > limit)
1388         limit = th;
1389       pre_condition =
1390         fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1391                      build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
1392       if (cond_expr)
1393         {
1394           pre_condition =
1395             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1396                          pre_condition,
1397                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1398                                       cond_expr));
1399         }
1400     }
1401
1402   /* Prologue peeling.  */
1403   else
1404     {
1405       if (check_profitability)
1406         set_prologue_iterations (bb_before_first_loop, first_niters,
1407                                  loop, th, first_guard_probability);
1408
1409       pre_condition =
1410         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1411                      build_int_cst (TREE_TYPE (*first_niters), 0));
1412     }
1413
1414   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1415                                   cond_expr_stmt_list,
1416                                   bb_before_second_loop, bb_before_first_loop,
1417                                   inverse_probability (first_guard_probability));
1418   scale_loop_profile (first_loop, first_guard_probability,
1419                       check_profitability && (int)th > bound1 ? th : bound1);
1420   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1421                                       first_loop == new_loop,
1422                                       &new_exit_bb);
1423
1424
1425   /* 3. Add the guard that controls whether the second loop is executed.
1426         Resulting CFG would be:
1427
1428         bb_before_first_loop:
1429         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1430                                GOTO first-loop
1431
1432         first_loop:
1433         do {
1434         } while ...
1435
1436         bb_between_loops:
1437         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1438                                     GOTO bb_before_second_loop
1439
1440         bb_before_second_loop:
1441
1442         second_loop:
1443         do {
1444         } while ...
1445
1446         bb_after_second_loop:
1447
1448         orig_exit_bb:
1449    */
1450
1451   bb_between_loops = new_exit_bb;
1452   bb_after_second_loop = split_edge (single_exit (second_loop));
1453
1454   pre_condition =
1455         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1456   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1457                                   bb_after_second_loop, bb_before_first_loop,
1458                                   inverse_probability (second_guard_probability));
1459   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1460   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1461                                      second_loop == new_loop, &new_exit_bb);
1462
1463   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1464    */
1465   if (update_first_loop_count)
1466     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1467
1468   delete_update_ssa ();
1469
1470   adjust_vec_debug_stmts ();
1471
1472   return new_loop;
1473 }
1474
1475 /* Function vect_get_loop_location.
1476
1477    Extract the location of the loop in the source code.
1478    If the loop is not well formed for vectorization, an estimated
1479    location is calculated.
1480    Return the loop location if succeed and NULL if not.  */
1481
1482 source_location
1483 find_loop_location (struct loop *loop)
1484 {
1485   gimple stmt = NULL;
1486   basic_block bb;
1487   gimple_stmt_iterator si;
1488
1489   if (!loop)
1490     return UNKNOWN_LOCATION;
1491
1492   stmt = get_loop_exit_condition (loop);
1493
1494   if (stmt
1495       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1496     return gimple_location (stmt);
1497
1498   /* If we got here the loop is probably not "well formed",
1499      try to estimate the loop location */
1500
1501   if (!loop->header)
1502     return UNKNOWN_LOCATION;
1503
1504   bb = loop->header;
1505
1506   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1507     {
1508       stmt = gsi_stmt (si);
1509       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1510         return gimple_location (stmt);
1511     }
1512
1513   return UNKNOWN_LOCATION;
1514 }
1515
1516
1517 /* Function vect_can_advance_ivs_p
1518
1519    In case the number of iterations that LOOP iterates is unknown at compile
1520    time, an epilog loop will be generated, and the loop induction variables
1521    (IVs) will be "advanced" to the value they are supposed to take just before
1522    the epilog loop.  Here we check that the access function of the loop IVs
1523    and the expression that represents the loop bound are simple enough.
1524    These restrictions will be relaxed in the future.  */
1525
1526 bool
1527 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1528 {
1529   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1530   basic_block bb = loop->header;
1531   gimple phi;
1532   gimple_stmt_iterator gsi;
1533
1534   /* Analyze phi functions of the loop header.  */
1535
1536   if (dump_enabled_p ())
1537     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1538   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1539     {
1540       tree evolution_part;
1541
1542       phi = gsi_stmt (gsi);
1543       if (dump_enabled_p ())
1544         {
1545           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1546           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1547           dump_printf (MSG_NOTE, "\n");
1548         }
1549
1550       /* Skip virtual phi's. The data dependences that are associated with
1551          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1552
1553       if (virtual_operand_p (PHI_RESULT (phi)))
1554         {
1555           if (dump_enabled_p ())
1556             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1557                              "virtual phi. skip.\n");
1558           continue;
1559         }
1560
1561       /* Skip reduction phis.  */
1562
1563       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1564         {
1565           if (dump_enabled_p ())
1566             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1567                              "reduc phi. skip.\n");
1568           continue;
1569         }
1570
1571       /* Analyze the evolution function.  */
1572
1573       evolution_part
1574         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1575       if (evolution_part == NULL_TREE)
1576         {
1577           if (dump_enabled_p ())
1578             dump_printf (MSG_MISSED_OPTIMIZATION,
1579                          "No access function or evolution.\n");
1580           return false;
1581         }
1582
1583       /* FORNOW: We do not transform initial conditions of IVs
1584          which evolution functions are a polynomial of degree >= 2.  */
1585
1586       if (tree_is_chrec (evolution_part))
1587         return false;
1588     }
1589
1590   return true;
1591 }
1592
1593
1594 /*   Function vect_update_ivs_after_vectorizer.
1595
1596      "Advance" the induction variables of LOOP to the value they should take
1597      after the execution of LOOP.  This is currently necessary because the
1598      vectorizer does not handle induction variables that are used after the
1599      loop.  Such a situation occurs when the last iterations of LOOP are
1600      peeled, because:
1601      1. We introduced new uses after LOOP for IVs that were not originally used
1602         after LOOP: the IVs of LOOP are now used by an epilog loop.
1603      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1604         times, whereas the loop IVs should be bumped N times.
1605
1606      Input:
1607      - LOOP - a loop that is going to be vectorized. The last few iterations
1608               of LOOP were peeled.
1609      - NITERS - the number of iterations that LOOP executes (before it is
1610                 vectorized). i.e, the number of times the ivs should be bumped.
1611      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1612                   coming out from LOOP on which there are uses of the LOOP ivs
1613                   (this is the path from LOOP->exit to epilog_loop->preheader).
1614
1615                   The new definitions of the ivs are placed in LOOP->exit.
1616                   The phi args associated with the edge UPDATE_E in the bb
1617                   UPDATE_E->dest are updated accordingly.
1618
1619      Assumption 1: Like the rest of the vectorizer, this function assumes
1620      a single loop exit that has a single predecessor.
1621
1622      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1623      organized in the same order.
1624
1625      Assumption 3: The access function of the ivs is simple enough (see
1626      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1627
1628      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1629      coming out of LOOP on which the ivs of LOOP are used (this is the path
1630      that leads to the epilog loop; other paths skip the epilog loop).  This
1631      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1632      needs to have its phis updated.
1633  */
1634
1635 static void
1636 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1637                                   edge update_e)
1638 {
1639   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1640   basic_block exit_bb = single_exit (loop)->dest;
1641   gimple phi, phi1;
1642   gimple_stmt_iterator gsi, gsi1;
1643   basic_block update_bb = update_e->dest;
1644
1645   gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1646
1647   /* Make sure there exists a single-predecessor exit bb:  */
1648   gcc_assert (single_pred_p (exit_bb));
1649
1650   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1651        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1652        gsi_next (&gsi), gsi_next (&gsi1))
1653     {
1654       tree init_expr;
1655       tree step_expr, off;
1656       tree type;
1657       tree var, ni, ni_name;
1658       gimple_stmt_iterator last_gsi;
1659       stmt_vec_info stmt_info;
1660
1661       phi = gsi_stmt (gsi);
1662       phi1 = gsi_stmt (gsi1);
1663       if (dump_enabled_p ())
1664         {
1665           dump_printf_loc (MSG_NOTE, vect_location,
1666                            "vect_update_ivs_after_vectorizer: phi: ");
1667           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1668           dump_printf (MSG_NOTE, "\n");
1669         }
1670
1671       /* Skip virtual phi's.  */
1672       if (virtual_operand_p (PHI_RESULT (phi)))
1673         {
1674           if (dump_enabled_p ())
1675             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1676                              "virtual phi. skip.\n");
1677           continue;
1678         }
1679
1680       /* Skip reduction phis.  */
1681       stmt_info = vinfo_for_stmt (phi);
1682       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1683         {
1684           if (dump_enabled_p ())
1685             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                              "reduc phi. skip.\n");
1687           continue;
1688         }
1689
1690       type = TREE_TYPE (gimple_phi_result (phi));
1691       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1692       step_expr = unshare_expr (step_expr);
1693
1694       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1695          of degree >= 2 or exponential.  */
1696       gcc_assert (!tree_is_chrec (step_expr));
1697
1698       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1699
1700       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1701                          fold_convert (TREE_TYPE (step_expr), niters),
1702                          step_expr);
1703       if (POINTER_TYPE_P (type))
1704         ni = fold_build_pointer_plus (init_expr, off);
1705       else
1706         ni = fold_build2 (PLUS_EXPR, type,
1707                           init_expr, fold_convert (type, off));
1708
1709       var = create_tmp_var (type, "tmp");
1710
1711       last_gsi = gsi_last_bb (exit_bb);
1712       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1713                                           true, GSI_SAME_STMT);
1714
1715       /* Fix phi expressions in the successor bb.  */
1716       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1717     }
1718 }
1719
1720 /* Function vect_do_peeling_for_loop_bound
1721
1722    Peel the last iterations of the loop represented by LOOP_VINFO.
1723    The peeled iterations form a new epilog loop.  Given that the loop now
1724    iterates NITERS times, the new epilog loop iterates
1725    NITERS % VECTORIZATION_FACTOR times.
1726
1727    The original loop will later be made to iterate
1728    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1729
1730    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1731    test.  */
1732
1733 void
1734 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1735                                 tree ni_name, tree ratio_mult_vf_name,
1736                                 unsigned int th, bool check_profitability)
1737 {
1738   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1739   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1740   struct loop *new_loop;
1741   edge update_e;
1742   basic_block preheader;
1743   int loop_num;
1744   int max_iter;
1745   tree cond_expr = NULL_TREE;
1746   gimple_seq cond_expr_stmt_list = NULL;
1747
1748   if (dump_enabled_p ())
1749     dump_printf_loc (MSG_NOTE, vect_location,
1750                      "=== vect_do_peeling_for_loop_bound ===\n");
1751
1752   initialize_original_copy_tables ();
1753
1754   loop_num  = loop->num;
1755
1756   new_loop
1757     = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1758                                      &ratio_mult_vf_name, ni_name, false,
1759                                      th, check_profitability,
1760                                      cond_expr, cond_expr_stmt_list,
1761                                      0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1762   gcc_assert (new_loop);
1763   gcc_assert (loop_num == loop->num);
1764 #ifdef ENABLE_CHECKING
1765   slpeel_verify_cfg_after_peeling (loop, new_loop);
1766 #endif
1767
1768   /* A guard that controls whether the new_loop is to be executed or skipped
1769      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1770      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1771      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1772      is on the path where the LOOP IVs are used and need to be updated.  */
1773
1774   preheader = loop_preheader_edge (new_loop)->src;
1775   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1776     update_e = EDGE_PRED (preheader, 0);
1777   else
1778     update_e = EDGE_PRED (preheader, 1);
1779
1780   /* Update IVs of original loop as if they were advanced
1781      by ratio_mult_vf_name steps.  */
1782   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1783
1784   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1785      and this means N-2 loopback edge executions.
1786
1787      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1788      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1789   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1790               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1791               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1792   if (check_profitability)
1793     max_iter = MAX (max_iter, (int) th - 1);
1794   record_niter_bound (new_loop, max_iter, false, true);
1795   dump_printf (MSG_NOTE,
1796                "Setting upper bound of nb iterations for epilogue "
1797                "loop to %d\n", max_iter);
1798
1799   /* After peeling we have to reset scalar evolution analyzer.  */
1800   scev_reset ();
1801
1802   free_original_copy_tables ();
1803 }
1804
1805
1806 /* Function vect_gen_niters_for_prolog_loop
1807
1808    Set the number of iterations for the loop represented by LOOP_VINFO
1809    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1810    and the misalignment of DR - the data reference recorded in
1811    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1812    this loop, the data reference DR will refer to an aligned location.
1813
1814    The following computation is generated:
1815
1816    If the misalignment of DR is known at compile time:
1817      addr_mis = int mis = DR_MISALIGNMENT (dr);
1818    Else, compute address misalignment in bytes:
1819      addr_mis = addr & (vectype_align - 1)
1820
1821    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1822
1823    (elem_size = element type size; an element is the scalar element whose type
1824    is the inner type of the vectype)
1825
1826    When the step of the data-ref in the loop is not 1 (as in interleaved data
1827    and SLP), the number of iterations of the prolog must be divided by the step
1828    (which is equal to the size of interleaved group).
1829
1830    The above formulas assume that VF == number of elements in the vector. This
1831    may not hold when there are multiple-types in the loop.
1832    In this case, for some data-references in the loop the VF does not represent
1833    the number of elements that fit in the vector.  Therefore, instead of VF we
1834    use TYPE_VECTOR_SUBPARTS.  */
1835
1836 static tree
1837 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1838 {
1839   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1840   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1841   tree var;
1842   gimple_seq stmts;
1843   tree iters, iters_name;
1844   edge pe;
1845   basic_block new_bb;
1846   gimple dr_stmt = DR_STMT (dr);
1847   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1848   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1849   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1850   tree niters_type = TREE_TYPE (loop_niters);
1851   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1852
1853   pe = loop_preheader_edge (loop);
1854
1855   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1856     {
1857       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1858
1859       if (dump_enabled_p ())
1860         dump_printf_loc (MSG_NOTE, vect_location,
1861                          "known peeling = %d.\n", npeel);
1862
1863       iters = build_int_cst (niters_type, npeel);
1864       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1865     }
1866   else
1867     {
1868       gimple_seq new_stmts = NULL;
1869       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1870       tree offset = negative
1871           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1872       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1873                                                 &new_stmts, offset, loop);
1874       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1875       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1876       HOST_WIDE_INT elem_size =
1877                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1878       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1879       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1880       tree nelements_tree = build_int_cst (type, nelements);
1881       tree byte_misalign;
1882       tree elem_misalign;
1883
1884       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1885       gcc_assert (!new_bb);
1886
1887       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1888       byte_misalign =
1889         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1890                      vectype_align_minus_1);
1891
1892       /* Create:  elem_misalign = byte_misalign / element_size  */
1893       elem_misalign =
1894         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1895
1896       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1897       if (negative)
1898         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1899       else
1900         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1901       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1902       iters = fold_convert (niters_type, iters);
1903       *bound = nelements;
1904     }
1905
1906   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1907   /* If the loop bound is known at compile time we already verified that it is
1908      greater than vf; since the misalignment ('iters') is at most vf, there's
1909      no need to generate the MIN_EXPR in this case.  */
1910   if (TREE_CODE (loop_niters) != INTEGER_CST)
1911     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1912
1913   if (dump_enabled_p ())
1914     {
1915       dump_printf_loc (MSG_NOTE, vect_location,
1916                        "niters for prolog loop: ");
1917       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1918       dump_printf (MSG_NOTE, "\n");
1919     }
1920
1921   var = create_tmp_var (niters_type, "prolog_loop_niters");
1922   stmts = NULL;
1923   iters_name = force_gimple_operand (iters, &stmts, false, var);
1924
1925   /* Insert stmt on loop preheader edge.  */
1926   if (stmts)
1927     {
1928       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1929       gcc_assert (!new_bb);
1930     }
1931
1932   return iters_name;
1933 }
1934
1935
1936 /* Function vect_update_init_of_dr
1937
1938    NITERS iterations were peeled from LOOP.  DR represents a data reference
1939    in LOOP.  This function updates the information recorded in DR to
1940    account for the fact that the first NITERS iterations had already been
1941    executed.  Specifically, it updates the OFFSET field of DR.  */
1942
1943 static void
1944 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1945 {
1946   tree offset = DR_OFFSET (dr);
1947
1948   niters = fold_build2 (MULT_EXPR, sizetype,
1949                         fold_convert (sizetype, niters),
1950                         fold_convert (sizetype, DR_STEP (dr)));
1951   offset = fold_build2 (PLUS_EXPR, sizetype,
1952                         fold_convert (sizetype, offset), niters);
1953   DR_OFFSET (dr) = offset;
1954 }
1955
1956
1957 /* Function vect_update_inits_of_drs
1958
1959    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1960    This function updates the information recorded for the data references in
1961    the loop to account for the fact that the first NITERS iterations had
1962    already been executed.  Specifically, it updates the initial_condition of
1963    the access_function of all the data_references in the loop.  */
1964
1965 static void
1966 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1967 {
1968   unsigned int i;
1969   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1970   struct data_reference *dr;
1971
1972  if (dump_enabled_p ())
1973     dump_printf_loc (MSG_NOTE, vect_location,
1974                      "=== vect_update_inits_of_dr ===\n");
1975
1976   FOR_EACH_VEC_ELT (datarefs, i, dr)
1977     vect_update_init_of_dr (dr, niters);
1978 }
1979
1980
1981 /* Function vect_do_peeling_for_alignment
1982
1983    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1984    'niters' is set to the misalignment of one of the data references in the
1985    loop, thereby forcing it to refer to an aligned location at the beginning
1986    of the execution of this loop.  The data reference for which we are
1987    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
1988
1989 void
1990 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
1991                                unsigned int th, bool check_profitability)
1992 {
1993   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1994   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1995   tree niters_of_prolog_loop;
1996   tree wide_prolog_niters;
1997   struct loop *new_loop;
1998   int max_iter;
1999   int bound = 0;
2000
2001   if (dump_enabled_p ())
2002     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2003                      "loop peeled for vectorization to enhance"
2004                      " alignment\n");
2005
2006   initialize_original_copy_tables ();
2007
2008   gimple_seq stmts = NULL;
2009   gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2010   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2011                                                            ni_name,
2012                                                            &bound);
2013
2014   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2015   new_loop =
2016     slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2017                                    loop_preheader_edge (loop),
2018                                    &niters_of_prolog_loop, ni_name, true,
2019                                    th, check_profitability, NULL_TREE, NULL,
2020                                    bound, 0);
2021
2022   gcc_assert (new_loop);
2023 #ifdef ENABLE_CHECKING
2024   slpeel_verify_cfg_after_peeling (new_loop, loop);
2025 #endif
2026   /* For vectorization factor N, we need to copy at most N-1 values
2027      for alignment and this means N-2 loopback edge executions.  */
2028   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2029   if (check_profitability)
2030     max_iter = MAX (max_iter, (int) th - 1);
2031   record_niter_bound (new_loop, max_iter, false, true);
2032   dump_printf (MSG_NOTE,
2033                "Setting upper bound of nb iterations for prologue "
2034                "loop to %d\n", max_iter);
2035
2036   /* Update number of times loop executes.  */
2037   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2038                 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
2039   LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2040                 TREE_TYPE (ni_name),
2041                 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
2042
2043   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2044     wide_prolog_niters = niters_of_prolog_loop;
2045   else
2046     {
2047       gimple_seq seq = NULL;
2048       edge pe = loop_preheader_edge (loop);
2049       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2050       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2051       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2052                                                  var);
2053       if (seq)
2054         {
2055           /* Insert stmt on loop preheader edge.  */
2056           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2057           gcc_assert (!new_bb);
2058         }
2059     }
2060
2061   /* Update the init conditions of the access functions of all data refs.  */
2062   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2063
2064   /* After peeling we have to reset scalar evolution analyzer.  */
2065   scev_reset ();
2066
2067   free_original_copy_tables ();
2068 }
2069
2070
2071 /* Function vect_create_cond_for_align_checks.
2072
2073    Create a conditional expression that represents the alignment checks for
2074    all of data references (array element references) whose alignment must be
2075    checked at runtime.
2076
2077    Input:
2078    COND_EXPR  - input conditional expression.  New conditions will be chained
2079                 with logical AND operation.
2080    LOOP_VINFO - two fields of the loop information are used.
2081                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2082                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2083
2084    Output:
2085    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2086                          expression.
2087    The returned value is the conditional expression to be used in the if
2088    statement that controls which version of the loop gets executed at runtime.
2089
2090    The algorithm makes two assumptions:
2091      1) The number of bytes "n" in a vector is a power of 2.
2092      2) An address "a" is aligned if a%n is zero and that this
2093         test can be done as a&(n-1) == 0.  For example, for 16
2094         byte vectors the test is a&0xf == 0.  */
2095
2096 static void
2097 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2098                                    tree *cond_expr,
2099                                    gimple_seq *cond_expr_stmt_list)
2100 {
2101   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2102   vec<gimple> may_misalign_stmts
2103     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2104   gimple ref_stmt;
2105   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2106   tree mask_cst;
2107   unsigned int i;
2108   tree int_ptrsize_type;
2109   char tmp_name[20];
2110   tree or_tmp_name = NULL_TREE;
2111   tree and_tmp_name;
2112   gimple and_stmt;
2113   tree ptrsize_zero;
2114   tree part_cond_expr;
2115
2116   /* Check that mask is one less than a power of 2, i.e., mask is
2117      all zeros followed by all ones.  */
2118   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2119
2120   int_ptrsize_type = signed_type_for (ptr_type_node);
2121
2122   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2123      of the first vector of the i'th data reference. */
2124
2125   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2126     {
2127       gimple_seq new_stmt_list = NULL;
2128       tree addr_base;
2129       tree addr_tmp_name;
2130       tree new_or_tmp_name;
2131       gimple addr_stmt, or_stmt;
2132       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2133       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2134       bool negative = tree_int_cst_compare
2135         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2136       tree offset = negative
2137         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2138
2139       /* create: addr_tmp = (int)(address_of_first_vector) */
2140       addr_base =
2141         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2142                                               offset, loop);
2143       if (new_stmt_list != NULL)
2144         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2145
2146       sprintf (tmp_name, "addr2int%d", i);
2147       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2148       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2149                                                 addr_base, NULL_TREE);
2150       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2151
2152       /* The addresses are OR together.  */
2153
2154       if (or_tmp_name != NULL_TREE)
2155         {
2156           /* create: or_tmp = or_tmp | addr_tmp */
2157           sprintf (tmp_name, "orptrs%d", i);
2158           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2159           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2160                                                   new_or_tmp_name,
2161                                                   or_tmp_name, addr_tmp_name);
2162           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2163           or_tmp_name = new_or_tmp_name;
2164         }
2165       else
2166         or_tmp_name = addr_tmp_name;
2167
2168     } /* end for i */
2169
2170   mask_cst = build_int_cst (int_ptrsize_type, mask);
2171
2172   /* create: and_tmp = or_tmp & mask  */
2173   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2174
2175   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2176                                            or_tmp_name, mask_cst);
2177   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2178
2179   /* Make and_tmp the left operand of the conditional test against zero.
2180      if and_tmp has a nonzero bit then some address is unaligned.  */
2181   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2182   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2183                                 and_tmp_name, ptrsize_zero);
2184   if (*cond_expr)
2185     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2186                               *cond_expr, part_cond_expr);
2187   else
2188     *cond_expr = part_cond_expr;
2189 }
2190
2191 /* Function vect_create_cond_for_alias_checks.
2192
2193    Create a conditional expression that represents the run-time checks for
2194    overlapping of address ranges represented by a list of data references
2195    relations passed as input.
2196
2197    Input:
2198    COND_EXPR  - input conditional expression.  New conditions will be chained
2199                 with logical AND operation.  If it is NULL, then the function
2200                 is used to return the number of alias checks.
2201    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2202                 to be checked.
2203
2204    Output:
2205    COND_EXPR - conditional expression.
2206
2207    The returned COND_EXPR is the conditional expression to be used in the if
2208    statement that controls which version of the loop gets executed at runtime.
2209 */
2210
2211 void
2212 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2213 {
2214   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2215     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2216   tree part_cond_expr;
2217
2218   /* Create expression
2219      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2220      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2221      &&
2222      ...
2223      &&
2224      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2225      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2226
2227   if (comp_alias_ddrs.is_empty ())
2228     return;
2229
2230   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2231     {
2232       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2233       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2234       tree segment_length_a = dr_a.seg_len;
2235       tree segment_length_b = dr_b.seg_len;
2236
2237       tree addr_base_a
2238         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2239       tree addr_base_b
2240         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2241
2242       if (dump_enabled_p ())
2243         {
2244           dump_printf_loc (MSG_NOTE, vect_location,
2245                            "create runtime check for data references ");
2246           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2247           dump_printf (MSG_NOTE, " and ");
2248           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2249           dump_printf (MSG_NOTE, "\n");
2250         }
2251
2252       tree seg_a_min = addr_base_a;
2253       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2254       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2255          bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2256          [a, a+12) */
2257       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2258         {
2259           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2260           seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2261           seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2262         }
2263
2264       tree seg_b_min = addr_base_b;
2265       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2266       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2267         {
2268           tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2269           seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2270           seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2271         }
2272
2273       part_cond_expr =
2274         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2275           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2276           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2277
2278       if (*cond_expr)
2279         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2280                                   *cond_expr, part_cond_expr);
2281       else
2282         *cond_expr = part_cond_expr;
2283     }
2284
2285   if (dump_enabled_p ())
2286     dump_printf_loc (MSG_NOTE, vect_location,
2287                      "created %u versioning for alias checks.\n",
2288                      comp_alias_ddrs.length ());
2289
2290   comp_alias_ddrs.release ();
2291 }
2292
2293
2294 /* Function vect_loop_versioning.
2295
2296    If the loop has data references that may or may not be aligned or/and
2297    has data reference relations whose independence was not proven then
2298    two versions of the loop need to be generated, one which is vectorized
2299    and one which isn't.  A test is then generated to control which of the
2300    loops is executed.  The test checks for the alignment of all of the
2301    data references that may or may not be aligned.  An additional
2302    sequence of runtime tests is generated for each pairs of DDRs whose
2303    independence was not proven.  The vectorized version of loop is
2304    executed only if both alias and alignment tests are passed.
2305
2306    The test generated to check which version of loop is executed
2307    is modified to also check for profitability as indicated by the
2308    cost model initially.
2309
2310    The versioning precondition(s) are placed in *COND_EXPR and
2311    *COND_EXPR_STMT_LIST.  */
2312
2313 void
2314 vect_loop_versioning (loop_vec_info loop_vinfo,
2315                       unsigned int th, bool check_profitability)
2316 {
2317   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2318   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2319   basic_block condition_bb;
2320   gimple_stmt_iterator gsi, cond_exp_gsi;
2321   basic_block merge_bb;
2322   basic_block new_exit_bb;
2323   edge new_exit_e, e;
2324   gimple orig_phi, new_phi;
2325   tree cond_expr = NULL_TREE;
2326   gimple_seq cond_expr_stmt_list = NULL;
2327   tree arg;
2328   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2329   gimple_seq gimplify_stmt_list = NULL;
2330   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2331   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2332   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2333
2334   if (check_profitability)
2335     {
2336       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2337                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2338       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2339                                           is_gimple_condexpr, NULL_TREE);
2340     }
2341
2342   if (version_align)
2343     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2344                                        &cond_expr_stmt_list);
2345
2346   if (version_alias)
2347     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2348
2349   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2350                                       is_gimple_condexpr, NULL_TREE);
2351   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2352
2353   initialize_original_copy_tables ();
2354   if (scalar_loop)
2355     {
2356       edge scalar_e;
2357       basic_block preheader, scalar_preheader;
2358
2359       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2360          scale LOOP's frequencies instead.  */
2361       loop_version (scalar_loop, cond_expr, &condition_bb,
2362                     prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2363       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2364       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2365          while we need to move it above LOOP's preheader.  */
2366       e = loop_preheader_edge (loop);
2367       scalar_e = loop_preheader_edge (scalar_loop);
2368       gcc_assert (empty_block_p (e->src)
2369                   && single_pred_p (e->src));
2370       gcc_assert (empty_block_p (scalar_e->src)
2371                   && single_pred_p (scalar_e->src));
2372       gcc_assert (single_pred_p (condition_bb));
2373       preheader = e->src;
2374       scalar_preheader = scalar_e->src;
2375       scalar_e = find_edge (condition_bb, scalar_preheader);
2376       e = single_pred_edge (preheader);
2377       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2378                                       scalar_preheader);
2379       redirect_edge_and_branch_force (scalar_e, preheader);
2380       redirect_edge_and_branch_force (e, condition_bb);
2381       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2382                                single_pred (condition_bb));
2383       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2384                                single_pred (scalar_preheader));
2385       set_immediate_dominator (CDI_DOMINATORS, preheader,
2386                                condition_bb);
2387     }
2388   else
2389     loop_version (loop, cond_expr, &condition_bb,
2390                   prob, prob, REG_BR_PROB_BASE - prob, true);
2391
2392   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2393       && dump_enabled_p ())
2394     {
2395       if (version_alias)
2396         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2397                          "loop versioned for vectorization because of "
2398                          "possible aliasing\n");
2399       if (version_align)
2400         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2401                          "loop versioned for vectorization to enhance "
2402                          "alignment\n");
2403
2404     }
2405   free_original_copy_tables ();
2406
2407   /* Loop versioning violates an assumption we try to maintain during
2408      vectorization - that the loop exit block has a single predecessor.
2409      After versioning, the exit block of both loop versions is the same
2410      basic block (i.e. it has two predecessors). Just in order to simplify
2411      following transformations in the vectorizer, we fix this situation
2412      here by adding a new (empty) block on the exit-edge of the loop,
2413      with the proper loop-exit phis to maintain loop-closed-form.
2414      If loop versioning wasn't done from loop, but scalar_loop instead,
2415      merge_bb will have already just a single successor.  */
2416
2417   merge_bb = single_exit (loop)->dest;
2418   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2419     {
2420       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2421       new_exit_bb = split_edge (single_exit (loop));
2422       new_exit_e = single_exit (loop);
2423       e = EDGE_SUCC (new_exit_bb, 0);
2424
2425       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2426         {
2427           tree new_res;
2428           orig_phi = gsi_stmt (gsi);
2429           new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2430           new_phi = create_phi_node (new_res, new_exit_bb);
2431           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2432           add_phi_arg (new_phi, arg, new_exit_e,
2433                        gimple_phi_arg_location_from_edge (orig_phi, e));
2434           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2435         }
2436     }
2437
2438   /* End loop-exit-fixes after versioning.  */
2439
2440   if (cond_expr_stmt_list)
2441     {
2442       cond_exp_gsi = gsi_last_bb (condition_bb);
2443       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2444                              GSI_SAME_STMT);
2445     }
2446   update_ssa (TODO_update_ssa);
2447 }