gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "gimple.h"
  32 #include "gimplify.h"
  33 #include "gimple-iterator.h"
  34 #include "gimplify-me.h"
  35 #include "gimple-ssa.h"
  36 #include "tree-cfg.h"
  37 #include "tree-phinodes.h"
  38 #include "ssa-iterators.h"
  39 #include "tree-ssanames.h"
  40 #include "tree-ssa-loop-manip.h"
  41 #include "tree-into-ssa.h"
  42 #include "tree-ssa.h"
  43 #include "tree-pass.h"
  44 #include "cfgloop.h"
  45 #include "diagnostic-core.h"
  46 #include "tree-scalar-evolution.h"
  47 #include "tree-vectorizer.h"
  48 #include "langhooks.h"
  49
  50 /*************************************************************************
  51   Simple Loop Peeling Utilities
  52
  53   Utilities to support loop peeling for vectorization purposes.
  54  *************************************************************************/
  55
  56
  57 /* Renames the use *OP_P.  */
  58
  59 static void
  60 rename_use_op (use_operand_p op_p)
  61 {
  62   tree new_name;
  63
  64   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  65     return;
  66
  67   new_name = get_current_def (USE_FROM_PTR (op_p));
  68
  69   /* Something defined outside of the loop.  */
  70   if (!new_name)
  71     return;
  72
  73   /* An ordinary ssa name defined in the loop.  */
  74
  75   SET_USE (op_p, new_name);
  76 }
  77
  78
  79 /* Renames the variables in basic block BB.  */
  80
  81 static void
  82 rename_variables_in_bb (basic_block bb)
  83 {
  84   gimple_stmt_iterator gsi;
  85   gimple stmt;
  86   use_operand_p use_p;
  87   ssa_op_iter iter;
  88   edge e;
  89   edge_iterator ei;
  90   struct loop *loop = bb->loop_father;
  91
  92   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  93     {
  94       stmt = gsi_stmt (gsi);
  95       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  96         rename_use_op (use_p);
  97     }
  98
  99   FOR_EACH_EDGE (e, ei, bb->preds)
 100     {
 101       if (!flow_bb_inside_loop_p (loop, e->src))
 102         continue;
 103       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 104         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
 105     }
 106 }
 107
 108
 109 typedef struct
 110 {
 111   tree from, to;
 112   basic_block bb;
 113 } adjust_info;
 114
 115 /* A stack of values to be adjusted in debug stmts.  We have to
 116    process them LIFO, so that the closest substitution applies.  If we
 117    processed them FIFO, without the stack, we might substitute uses
 118    with a PHI DEF that would soon become non-dominant, and when we got
 119    to the suitable one, it wouldn't have anything to substitute any
 120    more.  */
 121 static vec<adjust_info, va_heap> adjust_vec;
 122
 123 /* Adjust any debug stmts that referenced AI->from values to use the
 124    loop-closed AI->to, if the references are dominated by AI->bb and
 125    not by the definition of AI->from.  */
 126
 127 static void
 128 adjust_debug_stmts_now (adjust_info *ai)
 129 {
 130   basic_block bbphi = ai->bb;
 131   tree orig_def = ai->from;
 132   tree new_def = ai->to;
 133   imm_use_iterator imm_iter;
 134   gimple stmt;
 135   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 136
 137   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 138
 139   /* Adjust any debug stmts that held onto non-loop-closed
 140      references.  */
 141   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 142     {
 143       use_operand_p use_p;
 144       basic_block bbuse;
 145
 146       if (!is_gimple_debug (stmt))
 147         continue;
 148
 149       gcc_assert (gimple_debug_bind_p (stmt));
 150
 151       bbuse = gimple_bb (stmt);
 152
 153       if ((bbuse == bbphi
 154            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 155           && !(bbuse == bbdef
 156                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 157         {
 158           if (new_def)
 159             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 160               SET_USE (use_p, new_def);
 161           else
 162             {
 163               gimple_debug_bind_reset_value (stmt);
 164               update_stmt (stmt);
 165             }
 166         }
 167     }
 168 }
 169
 170 /* Adjust debug stmts as scheduled before.  */
 171
 172 static void
 173 adjust_vec_debug_stmts (void)
 174 {
 175   if (!MAY_HAVE_DEBUG_STMTS)
 176     return;
 177
 178   gcc_assert (adjust_vec.exists ());
 179
 180   while (!adjust_vec.is_empty ())
 181     {
 182       adjust_debug_stmts_now (&adjust_vec.last ());
 183       adjust_vec.pop ();
 184     }
 185
 186   adjust_vec.release ();
 187 }
 188
 189 /* Adjust any debug stmts that referenced FROM values to use the
 190    loop-closed TO, if the references are dominated by BB and not by
 191    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 192    will be postponed until adjust_vec_debug_stmts is called.  */
 193
 194 static void
 195 adjust_debug_stmts (tree from, tree to, basic_block bb)
 196 {
 197   adjust_info ai;
 198
 199   if (MAY_HAVE_DEBUG_STMTS
 200       && TREE_CODE (from) == SSA_NAME
 201       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 202       && ! virtual_operand_p (from))
 203     {
 204       ai.from = from;
 205       ai.to = to;
 206       ai.bb = bb;
 207
 208       if (adjust_vec.exists ())
 209         adjust_vec.safe_push (ai);
 210       else
 211         adjust_debug_stmts_now (&ai);
 212     }
 213 }
 214
 215 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 216    to adjust any debug stmts that referenced the old phi arg,
 217    presumably non-loop-closed references left over from other
 218    transformations.  */
 219
 220 static void
 221 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 222 {
 223   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 224
 225   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 226
 227   if (MAY_HAVE_DEBUG_STMTS)
 228     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 229                         gimple_bb (update_phi));
 230 }
 231
 232
 233 /* Update PHI nodes for a guard of the LOOP.
 234
 235    Input:
 236    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 237         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 238         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 239         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 240         We denote this bb NEW_MERGE_BB because before the guard code was added
 241         it had a single predecessor (the LOOP header), and now it became a merge
 242         point of two paths - the path that ends with the LOOP exit-edge, and
 243         the path that ends with GUARD_EDGE.
 244    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 245         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 246
 247    ===> The CFG before the guard-code was added:
 248         LOOP_header_bb:
 249           loop_body
 250           if (exit_loop) goto update_bb
 251           else           goto LOOP_header_bb
 252         update_bb:
 253
 254    ==> The CFG after the guard-code was added:
 255         guard_bb:
 256           if (LOOP_guard_condition) goto new_merge_bb
 257           else                      goto LOOP_header_bb
 258         LOOP_header_bb:
 259           loop_body
 260           if (exit_loop_condition) goto new_merge_bb
 261           else                     goto LOOP_header_bb
 262         new_merge_bb:
 263           goto update_bb
 264         update_bb:
 265
 266    ==> The CFG after this function:
 267         guard_bb:
 268           if (LOOP_guard_condition) goto new_merge_bb
 269           else                      goto LOOP_header_bb
 270         LOOP_header_bb:
 271           loop_body
 272           if (exit_loop_condition) goto new_exit_bb
 273           else                     goto LOOP_header_bb
 274         new_exit_bb:
 275         new_merge_bb:
 276           goto update_bb
 277         update_bb:
 278
 279    This function:
 280    1. creates and updates the relevant phi nodes to account for the new
 281       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 282       1.1. Create phi nodes at NEW_MERGE_BB.
 283       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 284            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 285    2. preserves loop-closed-ssa-form by creating the required phi nodes
 286       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 287
 288    There are two flavors to this function:
 289
 290    slpeel_update_phi_nodes_for_guard1:
 291      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 292      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 293      for variables that have phis in the loop header.
 294
 295    slpeel_update_phi_nodes_for_guard2:
 296      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 297      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 298      for variables that have phis in the loop exit.
 299
 300    I.E., the overall structure is:
 301
 302         loop1_preheader_bb:
 303                 guard1 (goto loop1/merge1_bb)
 304         loop1
 305         loop1_exit_bb:
 306                 guard2 (goto merge1_bb/merge2_bb)
 307         merge1_bb
 308         loop2
 309         loop2_exit_bb
 310         merge2_bb
 311         next_bb
 312
 313    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 314    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 315    that have phis in loop1->header).
 316
 317    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 318    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 319    that have phis in next_bb). It also adds some of these phis to
 320    loop1_exit_bb.
 321
 322    slpeel_update_phi_nodes_for_guard1 is always called before
 323    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 324    to create correct data-flow and loop-closed-ssa-form.
 325
 326    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 327    that change between iterations of a loop (and therefore have a phi-node
 328    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 329    phis for variables that are used out of the loop (and therefore have
 330    loop-closed exit phis). Some variables may be both updated between
 331    iterations and used after the loop. This is why in loop1_exit_bb we
 332    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 333    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 334
 335    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 336      an original loop. i.e., we have:
 337
 338            orig_loop
 339            guard_bb (goto LOOP/new_merge)
 340            new_loop <-- LOOP
 341            new_exit
 342            new_merge
 343            next_bb
 344
 345      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 346      have:
 347
 348            new_loop
 349            guard_bb (goto LOOP/new_merge)
 350            orig_loop <-- LOOP
 351            new_exit
 352            new_merge
 353            next_bb
 354
 355      The SSA names defined in the original loop have a current
 356      reaching definition that that records the corresponding new
 357      ssa-name used in the new duplicated loop copy.
 358   */
 359
 360 /* Function slpeel_update_phi_nodes_for_guard1
 361
 362    Input:
 363    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 364    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 365             information.
 366
 367    In the context of the overall structure, we have:
 368
 369         loop1_preheader_bb:
 370                 guard1 (goto loop1/merge1_bb)
 371 LOOP->  loop1
 372         loop1_exit_bb:
 373                 guard2 (goto merge1_bb/merge2_bb)
 374         merge1_bb
 375         loop2
 376         loop2_exit_bb
 377         merge2_bb
 378         next_bb
 379
 380    For each name updated between loop iterations (i.e - for each name that has
 381    an entry (loop-header) phi in LOOP) we create a new phi in:
 382    1. merge1_bb (to account for the edge from guard1)
 383    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 384 */
 385
 386 static void
 387 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 388                                     bool is_new_loop, basic_block *new_exit_bb)
 389 {
 390   gimple orig_phi, new_phi;
 391   gimple update_phi, update_phi2;
 392   tree guard_arg, loop_arg;
 393   basic_block new_merge_bb = guard_edge->dest;
 394   edge e = EDGE_SUCC (new_merge_bb, 0);
 395   basic_block update_bb = e->dest;
 396   basic_block orig_bb = loop->header;
 397   edge new_exit_e;
 398   tree current_new_name;
 399   gimple_stmt_iterator gsi_orig, gsi_update;
 400
 401   /* Create new bb between loop and new_merge_bb.  */
 402   *new_exit_bb = split_edge (single_exit (loop));
 403
 404   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 405
 406   for (gsi_orig = gsi_start_phis (orig_bb),
 407        gsi_update = gsi_start_phis (update_bb);
 408        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 409        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 410     {
 411       source_location loop_locus, guard_locus;
 412       tree new_res;
 413       orig_phi = gsi_stmt (gsi_orig);
 414       update_phi = gsi_stmt (gsi_update);
 415
 416       /** 1. Handle new-merge-point phis  **/
 417
 418       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 419       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 420       new_phi = create_phi_node (new_res, new_merge_bb);
 421
 422       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 423             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 424       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 425       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 426                                                       EDGE_SUCC (loop->latch,
 427                                                                  0));
 428       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 429       guard_locus
 430         = gimple_phi_arg_location_from_edge (orig_phi,
 431                                              loop_preheader_edge (loop));
 432
 433       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 434       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 435
 436       /* 1.3. Update phi in successor block.  */
 437       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 438                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 439       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 440       update_phi2 = new_phi;
 441
 442
 443       /** 2. Handle loop-closed-ssa-form phis  **/
 444
 445       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 446         continue;
 447
 448       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 449       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 450       new_phi = create_phi_node (new_res, *new_exit_bb);
 451
 452       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 453       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 454
 455       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 456       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 457       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 458                                   PHI_RESULT (new_phi));
 459
 460       /* 2.4. Record the newly created name with set_current_def.
 461          We want to find a name such that
 462                 name = get_current_def (orig_loop_name)
 463          and to set its current definition as follows:
 464                 set_current_def (name, new_phi_name)
 465
 466          If LOOP is a new loop then loop_arg is already the name we're
 467          looking for. If LOOP is the original loop, then loop_arg is
 468          the orig_loop_name and the relevant name is recorded in its
 469          current reaching definition.  */
 470       if (is_new_loop)
 471         current_new_name = loop_arg;
 472       else
 473         {
 474           current_new_name = get_current_def (loop_arg);
 475           /* current_def is not available only if the variable does not
 476              change inside the loop, in which case we also don't care
 477              about recording a current_def for it because we won't be
 478              trying to create loop-exit-phis for it.  */
 479           if (!current_new_name)
 480             continue;
 481         }
 482       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 483
 484       set_current_def (current_new_name, PHI_RESULT (new_phi));
 485     }
 486 }
 487
 488
 489 /* Function slpeel_update_phi_nodes_for_guard2
 490
 491    Input:
 492    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 493
 494    In the context of the overall structure, we have:
 495
 496         loop1_preheader_bb:
 497                 guard1 (goto loop1/merge1_bb)
 498         loop1
 499         loop1_exit_bb:
 500                 guard2 (goto merge1_bb/merge2_bb)
 501         merge1_bb
 502 LOOP->  loop2
 503         loop2_exit_bb
 504         merge2_bb
 505         next_bb
 506
 507    For each name used out side the loop (i.e - for each name that has an exit
 508    phi in next_bb) we create a new phi in:
 509    1. merge2_bb (to account for the edge from guard_bb)
 510    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 511    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 512       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 513 */
 514
 515 static void
 516 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 517                                     bool is_new_loop, basic_block *new_exit_bb)
 518 {
 519   gimple orig_phi, new_phi;
 520   gimple update_phi, update_phi2;
 521   tree guard_arg, loop_arg;
 522   basic_block new_merge_bb = guard_edge->dest;
 523   edge e = EDGE_SUCC (new_merge_bb, 0);
 524   basic_block update_bb = e->dest;
 525   edge new_exit_e;
 526   tree orig_def, orig_def_new_name;
 527   tree new_name, new_name2;
 528   tree arg;
 529   gimple_stmt_iterator gsi;
 530
 531   /* Create new bb between loop and new_merge_bb.  */
 532   *new_exit_bb = split_edge (single_exit (loop));
 533
 534   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 535
 536   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 537     {
 538       tree new_res;
 539       update_phi = gsi_stmt (gsi);
 540       orig_phi = update_phi;
 541       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 542       /* This loop-closed-phi actually doesn't represent a use
 543          out of the loop - the phi arg is a constant.  */
 544       if (TREE_CODE (orig_def) != SSA_NAME)
 545         continue;
 546       orig_def_new_name = get_current_def (orig_def);
 547       arg = NULL_TREE;
 548
 549       /** 1. Handle new-merge-point phis  **/
 550
 551       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 552       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 553       new_phi = create_phi_node (new_res, new_merge_bb);
 554
 555       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 556             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 557       new_name = orig_def;
 558       new_name2 = NULL_TREE;
 559       if (orig_def_new_name)
 560         {
 561           new_name = orig_def_new_name;
 562           /* Some variables have both loop-entry-phis and loop-exit-phis.
 563              Such variables were given yet newer names by phis placed in
 564              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 565              new_name2 = get_current_def (get_current_def (orig_name)).  */
 566           new_name2 = get_current_def (new_name);
 567         }
 568
 569       if (is_new_loop)
 570         {
 571           guard_arg = orig_def;
 572           loop_arg = new_name;
 573         }
 574       else
 575         {
 576           guard_arg = new_name;
 577           loop_arg = orig_def;
 578         }
 579       if (new_name2)
 580         guard_arg = new_name2;
 581
 582       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 583       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 584
 585       /* 1.3. Update phi in successor block.  */
 586       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 587       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 588       update_phi2 = new_phi;
 589
 590
 591       /** 2. Handle loop-closed-ssa-form phis  **/
 592
 593       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 594       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 595       new_phi = create_phi_node (new_res, *new_exit_bb);
 596
 597       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 598       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 599
 600       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 601       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 602       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 603                                   PHI_RESULT (new_phi));
 604
 605
 606       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 607
 608       /* 3.1. Find the relevant names that need an exit-phi in
 609          GUARD_BB, i.e. names for which
 610          slpeel_update_phi_nodes_for_guard1 had not already created a
 611          phi node. This is the case for names that are used outside
 612          the loop (and therefore need an exit phi) but are not updated
 613          across loop iterations (and therefore don't have a
 614          loop-header-phi).
 615
 616          slpeel_update_phi_nodes_for_guard1 is responsible for
 617          creating loop-exit phis in GUARD_BB for names that have a
 618          loop-header-phi.  When such a phi is created we also record
 619          the new name in its current definition.  If this new name
 620          exists, then guard_arg was set to this new name (see 1.2
 621          above).  Therefore, if guard_arg is not this new name, this
 622          is an indication that an exit-phi in GUARD_BB was not yet
 623          created, so we take care of it here.  */
 624       if (guard_arg == new_name2)
 625         continue;
 626       arg = guard_arg;
 627
 628       /* 3.2. Generate new phi node in GUARD_BB:  */
 629       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 630       new_phi = create_phi_node (new_res, guard_edge->src);
 631
 632       /* 3.3. GUARD_BB has one incoming edge:  */
 633       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 634       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 635                    UNKNOWN_LOCATION);
 636
 637       /* 3.4. Update phi in successor of GUARD_BB:  */
 638       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 639                                                                 == guard_arg);
 640       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 641                                   PHI_RESULT (new_phi));
 642     }
 643 }
 644
 645
 646 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 647    that starts at zero, increases by one and its limit is NITERS.
 648
 649    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 650
 651 void
 652 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 653 {
 654   tree indx_before_incr, indx_after_incr;
 655   gimple cond_stmt;
 656   gimple orig_cond;
 657   edge exit_edge = single_exit (loop);
 658   gimple_stmt_iterator loop_cond_gsi;
 659   gimple_stmt_iterator incr_gsi;
 660   bool insert_after;
 661   tree init = build_int_cst (TREE_TYPE (niters), 0);
 662   tree step = build_int_cst (TREE_TYPE (niters), 1);
 663   LOC loop_loc;
 664   enum tree_code code;
 665
 666   orig_cond = get_loop_exit_condition (loop);
 667   gcc_assert (orig_cond);
 668   loop_cond_gsi = gsi_for_stmt (orig_cond);
 669
 670   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 671   create_iv (init, step, NULL_TREE, loop,
 672              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 673
 674   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 675                                               true, NULL_TREE, true,
 676                                               GSI_SAME_STMT);
 677   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 678                                      true, GSI_SAME_STMT);
 679
 680   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 681   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 682                                  NULL_TREE);
 683
 684   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 685
 686   /* Remove old loop exit test:  */
 687   gsi_remove (&loop_cond_gsi, true);
 688   free_stmt_vec_info (orig_cond);
 689
 690   loop_loc = find_loop_location (loop);
 691   if (dump_enabled_p ())
 692     {
 693       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
 694         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
 695                      LOC_LINE (loop_loc));
 696       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 697       dump_printf (MSG_NOTE, "\n");
 698     }
 699   loop->nb_iterations = niters;
 700 }
 701
 702
 703 /* Given LOOP this function generates a new copy of it and puts it
 704    on E which is either the entry or exit of LOOP.  */
 705
 706 struct loop *
 707 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 708 {
 709   struct loop *new_loop;
 710   basic_block *new_bbs, *bbs;
 711   bool at_exit;
 712   bool was_imm_dom;
 713   basic_block exit_dest;
 714   edge exit, new_exit;
 715
 716   exit = single_exit (loop);
 717   at_exit = (e == exit);
 718   if (!at_exit && e != loop_preheader_edge (loop))
 719     return NULL;
 720
 721   bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 722   get_loop_body_with_size (loop, bbs, loop->num_nodes);
 723
 724   /* Check whether duplication is possible.  */
 725   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 726     {
 727       free (bbs);
 728       return NULL;
 729     }
 730
 731   /* Generate new loop structure.  */
 732   new_loop = duplicate_loop (loop, loop_outer (loop));
 733   duplicate_subloops (loop, new_loop);
 734
 735   exit_dest = exit->dest;
 736   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 737                                           exit_dest) == loop->header ?
 738                  true : false);
 739
 740   /* Also copy the pre-header, this avoids jumping through hoops to
 741      duplicate the loop entry PHI arguments.  Create an empty
 742      pre-header unconditionally for this.  */
 743   basic_block preheader = split_edge (loop_preheader_edge (loop));
 744   edge entry_e = single_pred_edge (preheader);
 745   bbs[loop->num_nodes] = preheader;
 746   new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 747
 748   copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
 749             &exit, 1, &new_exit, NULL,
 750             e->src, true);
 751   basic_block new_preheader = new_bbs[loop->num_nodes];
 752
 753   add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
 754
 755   if (at_exit) /* Add the loop copy at exit.  */
 756     {
 757       redirect_edge_and_branch_force (e, new_preheader);
 758       flush_pending_stmts (e);
 759       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 760       if (was_imm_dom)
 761         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 762
 763       /* And remove the non-necessary forwarder again.  Keep the other
 764          one so we have a proper pre-header for the loop at the exit edge.  */
 765       redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
 766       delete_basic_block (preheader);
 767       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 768                                loop_preheader_edge (loop)->src);
 769     }
 770   else /* Add the copy at entry.  */
 771     {
 772       redirect_edge_and_branch_force (entry_e, new_preheader);
 773       flush_pending_stmts (entry_e);
 774       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 775
 776       redirect_edge_and_branch_force (new_exit, preheader);
 777       flush_pending_stmts (new_exit);
 778       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 779
 780       /* And remove the non-necessary forwarder again.  Keep the other
 781          one so we have a proper pre-header for the loop at the exit edge.  */
 782       redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
 783       delete_basic_block (new_preheader);
 784       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 785                                loop_preheader_edge (new_loop)->src);
 786     }
 787
 788   for (unsigned i = 0; i < loop->num_nodes+1; i++)
 789     rename_variables_in_bb (new_bbs[i]);
 790
 791   free (new_bbs);
 792   free (bbs);
 793
 794 #ifdef ENABLE_CHECKING
 795   verify_dominators (CDI_DOMINATORS);
 796 #endif
 797
 798   return new_loop;
 799 }
 800
 801
 802 /* Given the condition statement COND, put it as the last statement
 803    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 804    Assumes that this is the single exit of the guarded loop.
 805    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 806
 807 static edge
 808 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 809                        gimple_seq cond_expr_stmt_list,
 810                        basic_block exit_bb, basic_block dom_bb,
 811                        int probability)
 812 {
 813   gimple_stmt_iterator gsi;
 814   edge new_e, enter_e;
 815   gimple cond_stmt;
 816   gimple_seq gimplify_stmt_list = NULL;
 817
 818   enter_e = EDGE_SUCC (guard_bb, 0);
 819   enter_e->flags &= ~EDGE_FALLTHRU;
 820   enter_e->flags |= EDGE_FALSE_VALUE;
 821   gsi = gsi_last_bb (guard_bb);
 822
 823   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 824                                  NULL_TREE);
 825   if (gimplify_stmt_list)
 826     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 827   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 828   if (cond_expr_stmt_list)
 829     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 830
 831   gsi = gsi_last_bb (guard_bb);
 832   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 833
 834   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 835   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 836
 837   new_e->count = guard_bb->count;
 838   new_e->probability = probability;
 839   new_e->count = apply_probability (enter_e->count, probability);
 840   enter_e->count -= new_e->count;
 841   enter_e->probability = inverse_probability (probability);
 842   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 843   return new_e;
 844 }
 845
 846
 847 /* This function verifies that the following restrictions apply to LOOP:
 848    (1) it is innermost
 849    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 850    (3) it is single entry, single exit
 851    (4) its exit condition is the last stmt in the header
 852    (5) E is the entry/exit edge of LOOP.
 853  */
 854
 855 bool
 856 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 857 {
 858   edge exit_e = single_exit (loop);
 859   edge entry_e = loop_preheader_edge (loop);
 860   gimple orig_cond = get_loop_exit_condition (loop);
 861   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 862
 863   if (loop->inner
 864       /* All loops have an outer scope; the only case loop->outer is NULL is for
 865          the function itself.  */
 866       || !loop_outer (loop)
 867       || loop->num_nodes != 2
 868       || !empty_block_p (loop->latch)
 869       || !single_exit (loop)
 870       /* Verify that new loop exit condition can be trivially modified.  */
 871       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 872       || (e != exit_e && e != entry_e))
 873     return false;
 874
 875   return true;
 876 }
 877
 878 #ifdef ENABLE_CHECKING
 879 static void
 880 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 881                                  struct loop *second_loop)
 882 {
 883   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 884   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 885   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 886
 887   /* A guard that controls whether the second_loop is to be executed or skipped
 888      is placed in first_loop->exit.  first_loop->exit therefore has two
 889      successors - one is the preheader of second_loop, and the other is a bb
 890      after second_loop.
 891    */
 892   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 893
 894   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 895         of second_loop.  */
 896
 897   /* The preheader of new_loop is expected to have two predecessors:
 898      first_loop->exit and the block that precedes first_loop.  */
 899
 900   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 901               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 902                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 903                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 904                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 905
 906   /* Verify that the other successor of first_loop->exit is after the
 907      second_loop.  */
 908   /* TODO */
 909 }
 910 #endif
 911
 912 /* If the run time cost model check determines that vectorization is
 913    not profitable and hence scalar loop should be generated then set
 914    FIRST_NITERS to prologue peeled iterations. This will allow all the
 915    iterations to be executed in the prologue peeled scalar loop.  */
 916
 917 static void
 918 set_prologue_iterations (basic_block bb_before_first_loop,
 919                          tree *first_niters,
 920                          struct loop *loop,
 921                          unsigned int th,
 922                          int probability)
 923 {
 924   edge e;
 925   basic_block cond_bb, then_bb;
 926   tree var, prologue_after_cost_adjust_name;
 927   gimple_stmt_iterator gsi;
 928   gimple newphi;
 929   edge e_true, e_false, e_fallthru;
 930   gimple cond_stmt;
 931   gimple_seq stmts = NULL;
 932   tree cost_pre_condition = NULL_TREE;
 933   tree scalar_loop_iters =
 934     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 935
 936   e = single_pred_edge (bb_before_first_loop);
 937   cond_bb = split_edge (e);
 938
 939   e = single_pred_edge (bb_before_first_loop);
 940   then_bb = split_edge (e);
 941   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 942
 943   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 944                                    EDGE_FALSE_VALUE);
 945   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 946
 947   e_true = EDGE_PRED (then_bb, 0);
 948   e_true->flags &= ~EDGE_FALLTHRU;
 949   e_true->flags |= EDGE_TRUE_VALUE;
 950
 951   e_true->probability = probability;
 952   e_false->probability = inverse_probability (probability);
 953   e_true->count = apply_probability (cond_bb->count, probability);
 954   e_false->count = cond_bb->count - e_true->count;
 955   then_bb->frequency = EDGE_FREQUENCY (e_true);
 956   then_bb->count = e_true->count;
 957
 958   e_fallthru = EDGE_SUCC (then_bb, 0);
 959   e_fallthru->count = then_bb->count;
 960
 961   gsi = gsi_last_bb (cond_bb);
 962   cost_pre_condition =
 963     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 964                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 965   cost_pre_condition =
 966     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
 967                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
 968   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
 969                                            NULL_TREE, NULL_TREE);
 970   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 971
 972   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 973                         "prologue_after_cost_adjust");
 974   prologue_after_cost_adjust_name =
 975     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 976
 977   gsi = gsi_last_bb (then_bb);
 978   if (stmts)
 979     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 980
 981   newphi = create_phi_node (var, bb_before_first_loop);
 982   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
 983                UNKNOWN_LOCATION);
 984   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
 985
 986   *first_niters = PHI_RESULT (newphi);
 987 }
 988
 989 /* Function slpeel_tree_peel_loop_to_edge.
 990
 991    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 992    that is placed on the entry (exit) edge E of LOOP. After this transformation
 993    we have two loops one after the other - first-loop iterates FIRST_NITERS
 994    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 995    If the cost model indicates that it is profitable to emit a scalar
 996    loop instead of the vector one, then the prolog (epilog) loop will iterate
 997    for the entire unchanged scalar iterations of the loop.
 998
 999    Input:
1000    - LOOP: the loop to be peeled.
1001    - E: the exit or entry edge of LOOP.
1002         If it is the entry edge, we peel the first iterations of LOOP. In this
1003         case first-loop is LOOP, and second-loop is the newly created loop.
1004         If it is the exit edge, we peel the last iterations of LOOP. In this
1005         case, first-loop is the newly created loop, and second-loop is LOOP.
1006    - NITERS: the number of iterations that LOOP iterates.
1007    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1008    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1009         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1010         is false, the caller of this function may want to take care of this
1011         (this can be useful if we don't want new stmts added to first-loop).
1012    - TH: cost model profitability threshold of iterations for vectorization.
1013    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1014                           during versioning and hence needs to occur during
1015                           prologue generation or whether cost model check
1016                           has not occurred during prologue generation and hence
1017                           needs to occur during epilogue generation.
1018    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1019    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1020
1021
1022    Output:
1023    The function returns a pointer to the new loop-copy, or NULL if it failed
1024    to perform the transformation.
1025
1026    The function generates two if-then-else guards: one before the first loop,
1027    and the other before the second loop:
1028    The first guard is:
1029      if (FIRST_NITERS == 0) then skip the first loop,
1030      and go directly to the second loop.
1031    The second guard is:
1032      if (FIRST_NITERS == NITERS) then skip the second loop.
1033
1034    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1035    then the generated condition is combined with COND_EXPR and the
1036    statements in COND_EXPR_STMT_LIST are emitted together with it.
1037
1038    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1039    FORNOW the resulting code will not be in loop-closed-ssa form.
1040 */
1041
1042 static struct loop*
1043 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1044                                edge e, tree *first_niters,
1045                                tree niters, bool update_first_loop_count,
1046                                unsigned int th, bool check_profitability,
1047                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1048                                int bound1, int bound2)
1049 {
1050   struct loop *new_loop = NULL, *first_loop, *second_loop;
1051   edge skip_e;
1052   tree pre_condition = NULL_TREE;
1053   basic_block bb_before_second_loop, bb_after_second_loop;
1054   basic_block bb_before_first_loop;
1055   basic_block bb_between_loops;
1056   basic_block new_exit_bb;
1057   gimple_stmt_iterator gsi;
1058   edge exit_e = single_exit (loop);
1059   LOC loop_loc;
1060   tree cost_pre_condition = NULL_TREE;
1061   /* There are many aspects to how likely the first loop is going to be executed.
1062      Without histogram we can't really do good job.  Simply set it to
1063      2/3, so the first loop is not reordered to the end of function and
1064      the hot path through stays short.  */
1065   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1066   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1067   int probability_of_second_loop;
1068
1069   if (!slpeel_can_duplicate_loop_p (loop, e))
1070     return NULL;
1071
1072   /* We might have a queued need to update virtual SSA form.  As we
1073      delete the update SSA machinery below after doing a regular
1074      incremental SSA update during loop copying make sure we don't
1075      lose that fact.
1076      ???  Needing to update virtual SSA form by renaming is unfortunate
1077      but not all of the vectorizer code inserting new loads / stores
1078      properly assigns virtual operands to those statements.  */
1079   update_ssa (TODO_update_ssa_only_virtuals);
1080
1081   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1082      in the exit bb and rename all the uses after the loop.  This simplifies
1083      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1084      (but normally loop closed SSA form doesn't require virtual PHIs to be
1085      in the same form).  Doing this early simplifies the checking what
1086      uses should be renamed.  */
1087   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1088     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1089       {
1090         gimple phi = gsi_stmt (gsi);
1091         for (gsi = gsi_start_phis (exit_e->dest);
1092              !gsi_end_p (gsi); gsi_next (&gsi))
1093           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1094             break;
1095         if (gsi_end_p (gsi))
1096           {
1097             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1098             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1099             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1100             imm_use_iterator imm_iter;
1101             gimple stmt;
1102             use_operand_p use_p;
1103
1104             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1105             gimple_phi_set_result (new_phi, new_vop);
1106             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1107               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1108                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1109                   SET_USE (use_p, new_vop);
1110           }
1111         break;
1112       }
1113
1114   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1115         Resulting CFG would be:
1116
1117         first_loop:
1118         do {
1119         } while ...
1120
1121         second_loop:
1122         do {
1123         } while ...
1124
1125         orig_exit_bb:
1126    */
1127
1128   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1129     {
1130       loop_loc = find_loop_location (loop);
1131       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1132                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1133       return NULL;
1134     }
1135
1136   if (MAY_HAVE_DEBUG_STMTS)
1137     {
1138       gcc_assert (!adjust_vec.exists ());
1139       adjust_vec.create (32);
1140     }
1141
1142   if (e == exit_e)
1143     {
1144       /* NEW_LOOP was placed after LOOP.  */
1145       first_loop = loop;
1146       second_loop = new_loop;
1147     }
1148   else
1149     {
1150       /* NEW_LOOP was placed before LOOP.  */
1151       first_loop = new_loop;
1152       second_loop = loop;
1153     }
1154
1155   /* 2.  Add the guard code in one of the following ways:
1156
1157      2.a Add the guard that controls whether the first loop is executed.
1158          This occurs when this function is invoked for prologue or epilogue
1159          generation and when the cost model check can be done at compile time.
1160
1161          Resulting CFG would be:
1162
1163          bb_before_first_loop:
1164          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1165                                 GOTO first-loop
1166
1167          first_loop:
1168          do {
1169          } while ...
1170
1171          bb_before_second_loop:
1172
1173          second_loop:
1174          do {
1175          } while ...
1176
1177          orig_exit_bb:
1178
1179      2.b Add the cost model check that allows the prologue
1180          to iterate for the entire unchanged scalar
1181          iterations of the loop in the event that the cost
1182          model indicates that the scalar loop is more
1183          profitable than the vector one. This occurs when
1184          this function is invoked for prologue generation
1185          and the cost model check needs to be done at run
1186          time.
1187
1188          Resulting CFG after prologue peeling would be:
1189
1190          if (scalar_loop_iterations <= th)
1191            FIRST_NITERS = scalar_loop_iterations
1192
1193          bb_before_first_loop:
1194          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1195                                 GOTO first-loop
1196
1197          first_loop:
1198          do {
1199          } while ...
1200
1201          bb_before_second_loop:
1202
1203          second_loop:
1204          do {
1205          } while ...
1206
1207          orig_exit_bb:
1208
1209      2.c Add the cost model check that allows the epilogue
1210          to iterate for the entire unchanged scalar
1211          iterations of the loop in the event that the cost
1212          model indicates that the scalar loop is more
1213          profitable than the vector one. This occurs when
1214          this function is invoked for epilogue generation
1215          and the cost model check needs to be done at run
1216          time.  This check is combined with any pre-existing
1217          check in COND_EXPR to avoid versioning.
1218
1219          Resulting CFG after prologue peeling would be:
1220
1221          bb_before_first_loop:
1222          if ((scalar_loop_iterations <= th)
1223              ||
1224              FIRST_NITERS == 0) GOTO bb_before_second_loop
1225                                 GOTO first-loop
1226
1227          first_loop:
1228          do {
1229          } while ...
1230
1231          bb_before_second_loop:
1232
1233          second_loop:
1234          do {
1235          } while ...
1236
1237          orig_exit_bb:
1238   */
1239
1240   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1241   /* Loop copying insterted a forwarder block for us here.  */
1242   bb_before_second_loop = single_exit (first_loop)->dest;
1243
1244   probability_of_second_loop = (inverse_probability (first_guard_probability)
1245                                 + combine_probabilities (second_guard_probability,
1246                                                          first_guard_probability));
1247   /* Theoretically preheader edge of first loop and exit edge should have
1248      same frequencies.  Loop exit probablities are however easy to get wrong.
1249      It is safer to copy value from original loop entry.  */
1250   bb_before_second_loop->frequency
1251      = combine_probabilities (bb_before_first_loop->frequency,
1252                               probability_of_second_loop);
1253   bb_before_second_loop->count
1254      = apply_probability (bb_before_first_loop->count,
1255                           probability_of_second_loop);
1256   single_succ_edge (bb_before_second_loop)->count
1257      = bb_before_second_loop->count;
1258
1259   /* Epilogue peeling.  */
1260   if (!update_first_loop_count)
1261     {
1262       pre_condition =
1263         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1264                      build_int_cst (TREE_TYPE (*first_niters), 0));
1265       if (check_profitability)
1266         {
1267           tree scalar_loop_iters
1268             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1269                                         (loop_vec_info_for_loop (loop)));
1270           cost_pre_condition =
1271             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1272                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1273
1274           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1275                                        cost_pre_condition, pre_condition);
1276         }
1277       if (cond_expr)
1278         {
1279           pre_condition =
1280             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1281                          pre_condition,
1282                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1283                                       cond_expr));
1284         }
1285     }
1286
1287   /* Prologue peeling.  */
1288   else
1289     {
1290       if (check_profitability)
1291         set_prologue_iterations (bb_before_first_loop, first_niters,
1292                                  loop, th, first_guard_probability);
1293
1294       pre_condition =
1295         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1296                      build_int_cst (TREE_TYPE (*first_niters), 0));
1297     }
1298
1299   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1300                                   cond_expr_stmt_list,
1301                                   bb_before_second_loop, bb_before_first_loop,
1302                                   inverse_probability (first_guard_probability));
1303   scale_loop_profile (first_loop, first_guard_probability,
1304                       check_profitability && (int)th > bound1 ? th : bound1);
1305   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1306                                       first_loop == new_loop,
1307                                       &new_exit_bb);
1308
1309
1310   /* 3. Add the guard that controls whether the second loop is executed.
1311         Resulting CFG would be:
1312
1313         bb_before_first_loop:
1314         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1315                                GOTO first-loop
1316
1317         first_loop:
1318         do {
1319         } while ...
1320
1321         bb_between_loops:
1322         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1323                                     GOTO bb_before_second_loop
1324
1325         bb_before_second_loop:
1326
1327         second_loop:
1328         do {
1329         } while ...
1330
1331         bb_after_second_loop:
1332
1333         orig_exit_bb:
1334    */
1335
1336   bb_between_loops = new_exit_bb;
1337   bb_after_second_loop = split_edge (single_exit (second_loop));
1338
1339   pre_condition =
1340         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1341   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1342                                   bb_after_second_loop, bb_before_first_loop,
1343                                   inverse_probability (second_guard_probability));
1344   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1345   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1346                                      second_loop == new_loop, &new_exit_bb);
1347
1348   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1349    */
1350   if (update_first_loop_count)
1351     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1352
1353   delete_update_ssa ();
1354
1355   adjust_vec_debug_stmts ();
1356
1357   return new_loop;
1358 }
1359
1360 /* Function vect_get_loop_location.
1361
1362    Extract the location of the loop in the source code.
1363    If the loop is not well formed for vectorization, an estimated
1364    location is calculated.
1365    Return the loop location if succeed and NULL if not.  */
1366
1367 LOC
1368 find_loop_location (struct loop *loop)
1369 {
1370   gimple stmt = NULL;
1371   basic_block bb;
1372   gimple_stmt_iterator si;
1373
1374   if (!loop)
1375     return UNKNOWN_LOC;
1376
1377   stmt = get_loop_exit_condition (loop);
1378
1379   if (stmt
1380       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1381     return gimple_location (stmt);
1382
1383   /* If we got here the loop is probably not "well formed",
1384      try to estimate the loop location */
1385
1386   if (!loop->header)
1387     return UNKNOWN_LOC;
1388
1389   bb = loop->header;
1390
1391   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1392     {
1393       stmt = gsi_stmt (si);
1394       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1395         return gimple_location (stmt);
1396     }
1397
1398   return UNKNOWN_LOC;
1399 }
1400
1401
1402 /* This function builds ni_name = number of iterations loop executes
1403    on the loop preheader.  If SEQ is given the stmt is instead emitted
1404    there.  */
1405
1406 static tree
1407 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1408 {
1409   tree ni_name, var;
1410   gimple_seq stmts = NULL;
1411   edge pe;
1412   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1413   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1414
1415   var = create_tmp_var (TREE_TYPE (ni), "niters");
1416   ni_name = force_gimple_operand (ni, &stmts, false, var);
1417
1418   pe = loop_preheader_edge (loop);
1419   if (stmts)
1420     {
1421       if (seq)
1422         gimple_seq_add_seq (&seq, stmts);
1423       else
1424         {
1425           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1426           gcc_assert (!new_bb);
1427         }
1428     }
1429
1430   return ni_name;
1431 }
1432
1433
1434 /* This function generates the following statements:
1435
1436  ni_name = number of iterations loop executes
1437  ratio = ni_name / vf
1438  ratio_mult_vf_name = ratio * vf
1439
1440  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1441  if that is non-NULL.  */
1442
1443 void
1444 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1445                                  tree *ni_name_ptr,
1446                                  tree *ratio_mult_vf_name_ptr,
1447                                  tree *ratio_name_ptr,
1448                                  gimple_seq cond_expr_stmt_list)
1449 {
1450
1451   edge pe;
1452   basic_block new_bb;
1453   gimple_seq stmts;
1454   tree ni_name, ni_minus_gap_name;
1455   tree var;
1456   tree ratio_name;
1457   tree ratio_mult_vf_name;
1458   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1459   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1460   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1461   tree log_vf;
1462
1463   pe = loop_preheader_edge (loop);
1464
1465   /* Generate temporary variable that contains
1466      number of iterations loop executes.  */
1467
1468   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1469   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1470
1471   /* If epilogue loop is required because of data accesses with gaps, we
1472      subtract one iteration from the total number of iterations here for
1473      correct calculation of RATIO.  */
1474   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1475     {
1476       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1477                                        ni_name,
1478                                        build_one_cst (TREE_TYPE (ni_name)));
1479       if (!is_gimple_val (ni_minus_gap_name))
1480         {
1481           var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1482
1483           stmts = NULL;
1484           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1485                                                     true, var);
1486           if (cond_expr_stmt_list)
1487             gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1488           else
1489             {
1490               pe = loop_preheader_edge (loop);
1491               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1492               gcc_assert (!new_bb);
1493             }
1494         }
1495     }
1496   else
1497     ni_minus_gap_name = ni_name;
1498
1499   /* Create: ratio = ni >> log2(vf) */
1500
1501   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1502                             ni_minus_gap_name, log_vf);
1503   if (!is_gimple_val (ratio_name))
1504     {
1505       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1506
1507       stmts = NULL;
1508       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1509       if (cond_expr_stmt_list)
1510         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1511       else
1512         {
1513           pe = loop_preheader_edge (loop);
1514           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1515           gcc_assert (!new_bb);
1516         }
1517     }
1518
1519   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1520
1521   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1522                                     ratio_name, log_vf);
1523   if (!is_gimple_val (ratio_mult_vf_name))
1524     {
1525       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1526
1527       stmts = NULL;
1528       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1529                                                  true, var);
1530       if (cond_expr_stmt_list)
1531         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1532       else
1533         {
1534           pe = loop_preheader_edge (loop);
1535           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1536           gcc_assert (!new_bb);
1537         }
1538     }
1539
1540   *ni_name_ptr = ni_name;
1541   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1542   *ratio_name_ptr = ratio_name;
1543
1544   return;
1545 }
1546
1547 /* Function vect_can_advance_ivs_p
1548
1549    In case the number of iterations that LOOP iterates is unknown at compile
1550    time, an epilog loop will be generated, and the loop induction variables
1551    (IVs) will be "advanced" to the value they are supposed to take just before
1552    the epilog loop.  Here we check that the access function of the loop IVs
1553    and the expression that represents the loop bound are simple enough.
1554    These restrictions will be relaxed in the future.  */
1555
1556 bool
1557 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1558 {
1559   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1560   basic_block bb = loop->header;
1561   gimple phi;
1562   gimple_stmt_iterator gsi;
1563
1564   /* Analyze phi functions of the loop header.  */
1565
1566   if (dump_enabled_p ())
1567     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1568   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1569     {
1570       tree evolution_part;
1571
1572       phi = gsi_stmt (gsi);
1573       if (dump_enabled_p ())
1574         {
1575           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1576           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1577           dump_printf (MSG_NOTE, "\n");
1578         }
1579
1580       /* Skip virtual phi's. The data dependences that are associated with
1581          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1582
1583       if (virtual_operand_p (PHI_RESULT (phi)))
1584         {
1585           if (dump_enabled_p ())
1586             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1587                              "virtual phi. skip.\n");
1588           continue;
1589         }
1590
1591       /* Skip reduction phis.  */
1592
1593       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1594         {
1595           if (dump_enabled_p ())
1596             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1597                              "reduc phi. skip.\n");
1598           continue;
1599         }
1600
1601       /* Analyze the evolution function.  */
1602
1603       evolution_part
1604         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1605       if (evolution_part == NULL_TREE)
1606         {
1607           if (dump_enabled_p ())
1608             dump_printf (MSG_MISSED_OPTIMIZATION,
1609                          "No access function or evolution.\n");
1610           return false;
1611         }
1612
1613       /* FORNOW: We do not transform initial conditions of IVs
1614          which evolution functions are a polynomial of degree >= 2.  */
1615
1616       if (tree_is_chrec (evolution_part))
1617         return false;
1618     }
1619
1620   return true;
1621 }
1622
1623
1624 /*   Function vect_update_ivs_after_vectorizer.
1625
1626      "Advance" the induction variables of LOOP to the value they should take
1627      after the execution of LOOP.  This is currently necessary because the
1628      vectorizer does not handle induction variables that are used after the
1629      loop.  Such a situation occurs when the last iterations of LOOP are
1630      peeled, because:
1631      1. We introduced new uses after LOOP for IVs that were not originally used
1632         after LOOP: the IVs of LOOP are now used by an epilog loop.
1633      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1634         times, whereas the loop IVs should be bumped N times.
1635
1636      Input:
1637      - LOOP - a loop that is going to be vectorized. The last few iterations
1638               of LOOP were peeled.
1639      - NITERS - the number of iterations that LOOP executes (before it is
1640                 vectorized). i.e, the number of times the ivs should be bumped.
1641      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1642                   coming out from LOOP on which there are uses of the LOOP ivs
1643                   (this is the path from LOOP->exit to epilog_loop->preheader).
1644
1645                   The new definitions of the ivs are placed in LOOP->exit.
1646                   The phi args associated with the edge UPDATE_E in the bb
1647                   UPDATE_E->dest are updated accordingly.
1648
1649      Assumption 1: Like the rest of the vectorizer, this function assumes
1650      a single loop exit that has a single predecessor.
1651
1652      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1653      organized in the same order.
1654
1655      Assumption 3: The access function of the ivs is simple enough (see
1656      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1657
1658      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1659      coming out of LOOP on which the ivs of LOOP are used (this is the path
1660      that leads to the epilog loop; other paths skip the epilog loop).  This
1661      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1662      needs to have its phis updated.
1663  */
1664
1665 static void
1666 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1667                                   edge update_e)
1668 {
1669   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1670   basic_block exit_bb = single_exit (loop)->dest;
1671   gimple phi, phi1;
1672   gimple_stmt_iterator gsi, gsi1;
1673   basic_block update_bb = update_e->dest;
1674
1675   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1676
1677   /* Make sure there exists a single-predecessor exit bb:  */
1678   gcc_assert (single_pred_p (exit_bb));
1679
1680   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1681        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1682        gsi_next (&gsi), gsi_next (&gsi1))
1683     {
1684       tree init_expr;
1685       tree step_expr, off;
1686       tree type;
1687       tree var, ni, ni_name;
1688       gimple_stmt_iterator last_gsi;
1689       stmt_vec_info stmt_info;
1690
1691       phi = gsi_stmt (gsi);
1692       phi1 = gsi_stmt (gsi1);
1693       if (dump_enabled_p ())
1694         {
1695           dump_printf_loc (MSG_NOTE, vect_location,
1696                            "vect_update_ivs_after_vectorizer: phi: ");
1697           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1698           dump_printf (MSG_NOTE, "\n");
1699         }
1700
1701       /* Skip virtual phi's.  */
1702       if (virtual_operand_p (PHI_RESULT (phi)))
1703         {
1704           if (dump_enabled_p ())
1705             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1706                              "virtual phi. skip.\n");
1707           continue;
1708         }
1709
1710       /* Skip reduction phis.  */
1711       stmt_info = vinfo_for_stmt (phi);
1712       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1713         {
1714           if (dump_enabled_p ())
1715             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1716                              "reduc phi. skip.\n");
1717           continue;
1718         }
1719
1720       type = TREE_TYPE (gimple_phi_result (phi));
1721       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1722       step_expr = unshare_expr (step_expr);
1723
1724       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1725          of degree >= 2 or exponential.  */
1726       gcc_assert (!tree_is_chrec (step_expr));
1727
1728       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1729
1730       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1731                          fold_convert (TREE_TYPE (step_expr), niters),
1732                          step_expr);
1733       if (POINTER_TYPE_P (type))
1734         ni = fold_build_pointer_plus (init_expr, off);
1735       else
1736         ni = fold_build2 (PLUS_EXPR, type,
1737                           init_expr, fold_convert (type, off));
1738
1739       var = create_tmp_var (type, "tmp");
1740
1741       last_gsi = gsi_last_bb (exit_bb);
1742       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1743                                           true, GSI_SAME_STMT);
1744
1745       /* Fix phi expressions in the successor bb.  */
1746       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1747     }
1748 }
1749
1750 /* Function vect_do_peeling_for_loop_bound
1751
1752    Peel the last iterations of the loop represented by LOOP_VINFO.
1753    The peeled iterations form a new epilog loop.  Given that the loop now
1754    iterates NITERS times, the new epilog loop iterates
1755    NITERS % VECTORIZATION_FACTOR times.
1756
1757    The original loop will later be made to iterate
1758    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1759
1760    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1761    test.  */
1762
1763 void
1764 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1765                                 unsigned int th, bool check_profitability)
1766 {
1767   tree ni_name, ratio_mult_vf_name;
1768   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1769   struct loop *new_loop;
1770   edge update_e;
1771   basic_block preheader;
1772   int loop_num;
1773   int max_iter;
1774   tree cond_expr = NULL_TREE;
1775   gimple_seq cond_expr_stmt_list = NULL;
1776
1777   if (dump_enabled_p ())
1778     dump_printf_loc (MSG_NOTE, vect_location,
1779                      "=== vect_do_peeling_for_loop_bound ===\n");
1780
1781   initialize_original_copy_tables ();
1782
1783   /* Generate the following variables on the preheader of original loop:
1784
1785      ni_name = number of iteration the original loop executes
1786      ratio = ni_name / vf
1787      ratio_mult_vf_name = ratio * vf  */
1788   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1789                                    &ratio_mult_vf_name, ratio,
1790                                    cond_expr_stmt_list);
1791
1792   loop_num  = loop->num;
1793
1794   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1795                                             &ratio_mult_vf_name, ni_name, false,
1796                                             th, check_profitability,
1797                                             cond_expr, cond_expr_stmt_list,
1798                                             0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1799   gcc_assert (new_loop);
1800   gcc_assert (loop_num == loop->num);
1801 #ifdef ENABLE_CHECKING
1802   slpeel_verify_cfg_after_peeling (loop, new_loop);
1803 #endif
1804
1805   /* A guard that controls whether the new_loop is to be executed or skipped
1806      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1807      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1808      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1809      is on the path where the LOOP IVs are used and need to be updated.  */
1810
1811   preheader = loop_preheader_edge (new_loop)->src;
1812   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1813     update_e = EDGE_PRED (preheader, 0);
1814   else
1815     update_e = EDGE_PRED (preheader, 1);
1816
1817   /* Update IVs of original loop as if they were advanced
1818      by ratio_mult_vf_name steps.  */
1819   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1820
1821   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1822      and this means N-2 loopback edge executions.
1823
1824      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1825      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1826   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1827               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1828               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1829   if (check_profitability)
1830     max_iter = MAX (max_iter, (int) th - 1);
1831   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1832   dump_printf (MSG_NOTE,
1833                "Setting upper bound of nb iterations for epilogue "
1834                "loop to %d\n", max_iter);
1835
1836   /* After peeling we have to reset scalar evolution analyzer.  */
1837   scev_reset ();
1838
1839   free_original_copy_tables ();
1840 }
1841
1842
1843 /* Function vect_gen_niters_for_prolog_loop
1844
1845    Set the number of iterations for the loop represented by LOOP_VINFO
1846    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1847    and the misalignment of DR - the data reference recorded in
1848    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1849    this loop, the data reference DR will refer to an aligned location.
1850
1851    The following computation is generated:
1852
1853    If the misalignment of DR is known at compile time:
1854      addr_mis = int mis = DR_MISALIGNMENT (dr);
1855    Else, compute address misalignment in bytes:
1856      addr_mis = addr & (vectype_align - 1)
1857
1858    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1859
1860    (elem_size = element type size; an element is the scalar element whose type
1861    is the inner type of the vectype)
1862
1863    When the step of the data-ref in the loop is not 1 (as in interleaved data
1864    and SLP), the number of iterations of the prolog must be divided by the step
1865    (which is equal to the size of interleaved group).
1866
1867    The above formulas assume that VF == number of elements in the vector. This
1868    may not hold when there are multiple-types in the loop.
1869    In this case, for some data-references in the loop the VF does not represent
1870    the number of elements that fit in the vector.  Therefore, instead of VF we
1871    use TYPE_VECTOR_SUBPARTS.  */
1872
1873 static tree
1874 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1875 {
1876   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1877   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1878   tree var;
1879   gimple_seq stmts;
1880   tree iters, iters_name;
1881   edge pe;
1882   basic_block new_bb;
1883   gimple dr_stmt = DR_STMT (dr);
1884   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1885   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1886   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1887   tree niters_type = TREE_TYPE (loop_niters);
1888   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1889
1890   pe = loop_preheader_edge (loop);
1891
1892   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1893     {
1894       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1895
1896       if (dump_enabled_p ())
1897         dump_printf_loc (MSG_NOTE, vect_location,
1898                          "known peeling = %d.\n", npeel);
1899
1900       iters = build_int_cst (niters_type, npeel);
1901       *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1902     }
1903   else
1904     {
1905       gimple_seq new_stmts = NULL;
1906       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1907       tree offset = negative
1908           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1909       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1910                                                 &new_stmts, offset, loop);
1911       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1912       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1913       HOST_WIDE_INT elem_size =
1914                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1915       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1916       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1917       tree nelements_tree = build_int_cst (type, nelements);
1918       tree byte_misalign;
1919       tree elem_misalign;
1920
1921       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1922       gcc_assert (!new_bb);
1923
1924       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1925       byte_misalign =
1926         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1927                      vectype_align_minus_1);
1928
1929       /* Create:  elem_misalign = byte_misalign / element_size  */
1930       elem_misalign =
1931         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1932
1933       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1934       if (negative)
1935         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1936       else
1937         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1938       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1939       iters = fold_convert (niters_type, iters);
1940       *bound = nelements;
1941     }
1942
1943   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1944   /* If the loop bound is known at compile time we already verified that it is
1945      greater than vf; since the misalignment ('iters') is at most vf, there's
1946      no need to generate the MIN_EXPR in this case.  */
1947   if (TREE_CODE (loop_niters) != INTEGER_CST)
1948     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1949
1950   if (dump_enabled_p ())
1951     {
1952       dump_printf_loc (MSG_NOTE, vect_location,
1953                        "niters for prolog loop: ");
1954       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1955       dump_printf (MSG_NOTE, "\n");
1956     }
1957
1958   var = create_tmp_var (niters_type, "prolog_loop_niters");
1959   stmts = NULL;
1960   iters_name = force_gimple_operand (iters, &stmts, false, var);
1961
1962   /* Insert stmt on loop preheader edge.  */
1963   if (stmts)
1964     {
1965       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1966       gcc_assert (!new_bb);
1967     }
1968
1969   return iters_name;
1970 }
1971
1972
1973 /* Function vect_update_init_of_dr
1974
1975    NITERS iterations were peeled from LOOP.  DR represents a data reference
1976    in LOOP.  This function updates the information recorded in DR to
1977    account for the fact that the first NITERS iterations had already been
1978    executed.  Specifically, it updates the OFFSET field of DR.  */
1979
1980 static void
1981 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1982 {
1983   tree offset = DR_OFFSET (dr);
1984
1985   niters = fold_build2 (MULT_EXPR, sizetype,
1986                         fold_convert (sizetype, niters),
1987                         fold_convert (sizetype, DR_STEP (dr)));
1988   offset = fold_build2 (PLUS_EXPR, sizetype,
1989                         fold_convert (sizetype, offset), niters);
1990   DR_OFFSET (dr) = offset;
1991 }
1992
1993
1994 /* Function vect_update_inits_of_drs
1995
1996    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1997    This function updates the information recorded for the data references in
1998    the loop to account for the fact that the first NITERS iterations had
1999    already been executed.  Specifically, it updates the initial_condition of
2000    the access_function of all the data_references in the loop.  */
2001
2002 static void
2003 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2004 {
2005   unsigned int i;
2006   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2007   struct data_reference *dr;
2008
2009  if (dump_enabled_p ())
2010     dump_printf_loc (MSG_NOTE, vect_location,
2011                      "=== vect_update_inits_of_dr ===\n");
2012
2013   FOR_EACH_VEC_ELT (datarefs, i, dr)
2014     vect_update_init_of_dr (dr, niters);
2015 }
2016
2017
2018 /* Function vect_do_peeling_for_alignment
2019
2020    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2021    'niters' is set to the misalignment of one of the data references in the
2022    loop, thereby forcing it to refer to an aligned location at the beginning
2023    of the execution of this loop.  The data reference for which we are
2024    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2025
2026 void
2027 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2028                                unsigned int th, bool check_profitability)
2029 {
2030   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2031   tree niters_of_prolog_loop, ni_name;
2032   tree n_iters;
2033   tree wide_prolog_niters;
2034   struct loop *new_loop;
2035   int max_iter;
2036   int bound = 0;
2037
2038   if (dump_enabled_p ())
2039     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2040                      "loop peeled for vectorization to enhance"
2041                      " alignment\n");
2042
2043   initialize_original_copy_tables ();
2044
2045   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2046   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2047                                                            ni_name,
2048                                                            &bound);
2049
2050   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2051   new_loop =
2052     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2053                                    &niters_of_prolog_loop, ni_name, true,
2054                                    th, check_profitability, NULL_TREE, NULL,
2055                                    bound,
2056                                    0);
2057
2058   gcc_assert (new_loop);
2059 #ifdef ENABLE_CHECKING
2060   slpeel_verify_cfg_after_peeling (new_loop, loop);
2061 #endif
2062   /* For vectorization factor N, we need to copy at most N-1 values
2063      for alignment and this means N-2 loopback edge executions.  */
2064   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2065   if (check_profitability)
2066     max_iter = MAX (max_iter, (int) th - 1);
2067   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2068   dump_printf (MSG_NOTE,
2069                "Setting upper bound of nb iterations for prologue "
2070                "loop to %d\n", max_iter);
2071
2072   /* Update number of times loop executes.  */
2073   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2074   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2075                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2076
2077   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2078     wide_prolog_niters = niters_of_prolog_loop;
2079   else
2080     {
2081       gimple_seq seq = NULL;
2082       edge pe = loop_preheader_edge (loop);
2083       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2084       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2085       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2086                                                  var);
2087       if (seq)
2088         {
2089           /* Insert stmt on loop preheader edge.  */
2090           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2091           gcc_assert (!new_bb);
2092         }
2093     }
2094
2095   /* Update the init conditions of the access functions of all data refs.  */
2096   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2097
2098   /* After peeling we have to reset scalar evolution analyzer.  */
2099   scev_reset ();
2100
2101   free_original_copy_tables ();
2102 }
2103
2104
2105 /* Function vect_create_cond_for_align_checks.
2106
2107    Create a conditional expression that represents the alignment checks for
2108    all of data references (array element references) whose alignment must be
2109    checked at runtime.
2110
2111    Input:
2112    COND_EXPR  - input conditional expression.  New conditions will be chained
2113                 with logical AND operation.
2114    LOOP_VINFO - two fields of the loop information are used.
2115                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2116                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2117
2118    Output:
2119    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2120                          expression.
2121    The returned value is the conditional expression to be used in the if
2122    statement that controls which version of the loop gets executed at runtime.
2123
2124    The algorithm makes two assumptions:
2125      1) The number of bytes "n" in a vector is a power of 2.
2126      2) An address "a" is aligned if a%n is zero and that this
2127         test can be done as a&(n-1) == 0.  For example, for 16
2128         byte vectors the test is a&0xf == 0.  */
2129
2130 static void
2131 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2132                                    tree *cond_expr,
2133                                    gimple_seq *cond_expr_stmt_list)
2134 {
2135   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2136   vec<gimple> may_misalign_stmts
2137     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2138   gimple ref_stmt;
2139   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2140   tree mask_cst;
2141   unsigned int i;
2142   tree int_ptrsize_type;
2143   char tmp_name[20];
2144   tree or_tmp_name = NULL_TREE;
2145   tree and_tmp_name;
2146   gimple and_stmt;
2147   tree ptrsize_zero;
2148   tree part_cond_expr;
2149
2150   /* Check that mask is one less than a power of 2, i.e., mask is
2151      all zeros followed by all ones.  */
2152   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2153
2154   int_ptrsize_type = signed_type_for (ptr_type_node);
2155
2156   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2157      of the first vector of the i'th data reference. */
2158
2159   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2160     {
2161       gimple_seq new_stmt_list = NULL;
2162       tree addr_base;
2163       tree addr_tmp_name;
2164       tree new_or_tmp_name;
2165       gimple addr_stmt, or_stmt;
2166       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2167       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2168       bool negative = tree_int_cst_compare
2169         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2170       tree offset = negative
2171         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2172
2173       /* create: addr_tmp = (int)(address_of_first_vector) */
2174       addr_base =
2175         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2176                                               offset, loop);
2177       if (new_stmt_list != NULL)
2178         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2179
2180       sprintf (tmp_name, "addr2int%d", i);
2181       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2182       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2183                                                 addr_base, NULL_TREE);
2184       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2185
2186       /* The addresses are OR together.  */
2187
2188       if (or_tmp_name != NULL_TREE)
2189         {
2190           /* create: or_tmp = or_tmp | addr_tmp */
2191           sprintf (tmp_name, "orptrs%d", i);
2192           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2193           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2194                                                   new_or_tmp_name,
2195                                                   or_tmp_name, addr_tmp_name);
2196           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2197           or_tmp_name = new_or_tmp_name;
2198         }
2199       else
2200         or_tmp_name = addr_tmp_name;
2201
2202     } /* end for i */
2203
2204   mask_cst = build_int_cst (int_ptrsize_type, mask);
2205
2206   /* create: and_tmp = or_tmp & mask  */
2207   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2208
2209   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2210                                            or_tmp_name, mask_cst);
2211   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2212
2213   /* Make and_tmp the left operand of the conditional test against zero.
2214      if and_tmp has a nonzero bit then some address is unaligned.  */
2215   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2216   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2217                                 and_tmp_name, ptrsize_zero);
2218   if (*cond_expr)
2219     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2220                               *cond_expr, part_cond_expr);
2221   else
2222     *cond_expr = part_cond_expr;
2223 }
2224
2225 /* Function vect_create_cond_for_alias_checks.
2226
2227    Create a conditional expression that represents the run-time checks for
2228    overlapping of address ranges represented by a list of data references
2229    relations passed as input.
2230
2231    Input:
2232    COND_EXPR  - input conditional expression.  New conditions will be chained
2233                 with logical AND operation.  If it is NULL, then the function
2234                 is used to return the number of alias checks.
2235    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2236                 to be checked.
2237
2238    Output:
2239    COND_EXPR - conditional expression.
2240
2241    The returned COND_EXPR is the conditional expression to be used in the if
2242    statement that controls which version of the loop gets executed at runtime.
2243 */
2244
2245 void
2246 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2247 {
2248   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2249     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2250   tree part_cond_expr;
2251
2252   /* Create expression
2253      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2254      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2255      &&
2256      ...
2257      &&
2258      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2259      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2260
2261   if (comp_alias_ddrs.is_empty ())
2262     return;
2263
2264   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2265     {
2266       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2267       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2268       tree segment_length_a = dr_a.seg_len;
2269       tree segment_length_b = dr_b.seg_len;
2270
2271       tree addr_base_a
2272         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
2273       tree addr_base_b
2274         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
2275
2276       if (dump_enabled_p ())
2277         {
2278           dump_printf_loc (MSG_NOTE, vect_location,
2279                            "create runtime check for data references ");
2280           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2281           dump_printf (MSG_NOTE, " and ");
2282           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2283           dump_printf (MSG_NOTE, "\n");
2284         }
2285
2286       tree seg_a_min = addr_base_a;
2287       tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2288       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2289         seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2290
2291       tree seg_b_min = addr_base_b;
2292       tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2293       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2294         seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2295
2296       part_cond_expr =
2297         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2298           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2299           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2300
2301       if (*cond_expr)
2302         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2303                                   *cond_expr, part_cond_expr);
2304       else
2305         *cond_expr = part_cond_expr;
2306     }
2307
2308   if (dump_enabled_p ())
2309     dump_printf_loc (MSG_NOTE, vect_location,
2310                      "created %u versioning for alias checks.\n",
2311                      comp_alias_ddrs.length ());
2312
2313   comp_alias_ddrs.release ();
2314 }
2315
2316
2317 /* Function vect_loop_versioning.
2318
2319    If the loop has data references that may or may not be aligned or/and
2320    has data reference relations whose independence was not proven then
2321    two versions of the loop need to be generated, one which is vectorized
2322    and one which isn't.  A test is then generated to control which of the
2323    loops is executed.  The test checks for the alignment of all of the
2324    data references that may or may not be aligned.  An additional
2325    sequence of runtime tests is generated for each pairs of DDRs whose
2326    independence was not proven.  The vectorized version of loop is
2327    executed only if both alias and alignment tests are passed.
2328
2329    The test generated to check which version of loop is executed
2330    is modified to also check for profitability as indicated by the
2331    cost model initially.
2332
2333    The versioning precondition(s) are placed in *COND_EXPR and
2334    *COND_EXPR_STMT_LIST.  */
2335
2336 void
2337 vect_loop_versioning (loop_vec_info loop_vinfo,
2338                       unsigned int th, bool check_profitability)
2339 {
2340   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2341   basic_block condition_bb;
2342   gimple_stmt_iterator gsi, cond_exp_gsi;
2343   basic_block merge_bb;
2344   basic_block new_exit_bb;
2345   edge new_exit_e, e;
2346   gimple orig_phi, new_phi;
2347   tree cond_expr = NULL_TREE;
2348   gimple_seq cond_expr_stmt_list = NULL;
2349   tree arg;
2350   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2351   gimple_seq gimplify_stmt_list = NULL;
2352   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2353   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2354   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2355
2356   if (check_profitability)
2357     {
2358       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2359                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2360       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2361                                           is_gimple_condexpr, NULL_TREE);
2362     }
2363
2364   if (version_align)
2365     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2366                                        &cond_expr_stmt_list);
2367
2368   if (version_alias)
2369     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2370
2371   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2372                                       is_gimple_condexpr, NULL_TREE);
2373   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2374
2375   initialize_original_copy_tables ();
2376   loop_version (loop, cond_expr, &condition_bb,
2377                 prob, prob, REG_BR_PROB_BASE - prob, true);
2378
2379   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC
2380       && dump_enabled_p ())
2381     {
2382       if (version_alias)
2383         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2384                          "loop versioned for vectorization because of "
2385                          "possible aliasing\n");
2386       if (version_align)
2387         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2388                          "loop versioned for vectorization to enhance "
2389                          "alignment\n");
2390
2391     }
2392   free_original_copy_tables ();
2393
2394   /* Loop versioning violates an assumption we try to maintain during
2395      vectorization - that the loop exit block has a single predecessor.
2396      After versioning, the exit block of both loop versions is the same
2397      basic block (i.e. it has two predecessors). Just in order to simplify
2398      following transformations in the vectorizer, we fix this situation
2399      here by adding a new (empty) block on the exit-edge of the loop,
2400      with the proper loop-exit phis to maintain loop-closed-form.  */
2401
2402   merge_bb = single_exit (loop)->dest;
2403   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2404   new_exit_bb = split_edge (single_exit (loop));
2405   new_exit_e = single_exit (loop);
2406   e = EDGE_SUCC (new_exit_bb, 0);
2407
2408   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2409     {
2410       tree new_res;
2411       orig_phi = gsi_stmt (gsi);
2412       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2413       new_phi = create_phi_node (new_res, new_exit_bb);
2414       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2415       add_phi_arg (new_phi, arg, new_exit_e,
2416                    gimple_phi_arg_location_from_edge (orig_phi, e));
2417       adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2418     }
2419
2420
2421   /* Extract load statements on memrefs with zero-stride accesses.  */
2422
2423   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2424     {
2425       /* In the loop body, we iterate each statement to check if it is a load.
2426          Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
2427          then we will hoist the load statement to the loop preheader.  */
2428
2429       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2430       int nbbs = loop->num_nodes;
2431
2432       for (int i = 0; i < nbbs; ++i)
2433         {
2434           for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]);
2435                !gsi_end_p (si);)
2436             {
2437               gimple stmt = gsi_stmt (si);
2438               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2439               struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2440
2441               if (is_gimple_assign (stmt)
2442                   && (!dr
2443                       || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))))
2444                 {
2445                   bool hoist = true;
2446                   ssa_op_iter iter;
2447                   tree var;
2448
2449                   /* We hoist a statement if all SSA uses in it are defined
2450                      outside of the loop.  */
2451                   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
2452                     {
2453                       gimple def = SSA_NAME_DEF_STMT (var);
2454                       if (!gimple_nop_p (def)
2455                           && flow_bb_inside_loop_p (loop, gimple_bb (def)))
2456                         {
2457                           hoist = false;
2458                           break;
2459                         }
2460                     }
2461
2462                   if (hoist)
2463                     {
2464                       if (dr)
2465                         gimple_set_vuse (stmt, NULL);
2466
2467                       gsi_remove (&si, false);
2468                       gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
2469                                                     stmt);
2470
2471                       if (dump_enabled_p ())
2472                         {
2473                           dump_printf_loc
2474                               (MSG_NOTE, vect_location,
2475                                "hoisting out of the vectorized loop: ");
2476                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2477                           dump_printf (MSG_NOTE, "\n");
2478                         }
2479                       continue;
2480                     }
2481                 }
2482               gsi_next (&si);
2483             }
2484         }
2485     }
2486
2487   /* End loop-exit-fixes after versioning.  */
2488
2489   if (cond_expr_stmt_list)
2490     {
2491       cond_exp_gsi = gsi_last_bb (condition_bb);
2492       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2493                              GSI_SAME_STMT);
2494     }
2495   update_ssa (TODO_update_ssa);
2496 }