gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-ssa.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "diagnostic-core.h"
  35 #include "tree-scalar-evolution.h"
  36 #include "tree-vectorizer.h"
  37 #include "langhooks.h"
  38
  39 /*************************************************************************
  40   Simple Loop Peeling Utilities
  41
  42   Utilities to support loop peeling for vectorization purposes.
  43  *************************************************************************/
  44
  45
  46 /* Renames the use *OP_P.  */
  47
  48 static void
  49 rename_use_op (use_operand_p op_p)
  50 {
  51   tree new_name;
  52
  53   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  54     return;
  55
  56   new_name = get_current_def (USE_FROM_PTR (op_p));
  57
  58   /* Something defined outside of the loop.  */
  59   if (!new_name)
  60     return;
  61
  62   /* An ordinary ssa name defined in the loop.  */
  63
  64   SET_USE (op_p, new_name);
  65 }
  66
  67
  68 /* Renames the variables in basic block BB.  */
  69
  70 static void
  71 rename_variables_in_bb (basic_block bb)
  72 {
  73   gimple_stmt_iterator gsi;
  74   gimple stmt;
  75   use_operand_p use_p;
  76   ssa_op_iter iter;
  77   edge e;
  78   edge_iterator ei;
  79   struct loop *loop = bb->loop_father;
  80
  81   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  82     {
  83       stmt = gsi_stmt (gsi);
  84       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  85         rename_use_op (use_p);
  86     }
  87
  88   FOR_EACH_EDGE (e, ei, bb->preds)
  89     {
  90       if (!flow_bb_inside_loop_p (loop, e->src))
  91         continue;
  92       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  93         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  94     }
  95 }
  96
  97
  98 typedef struct
  99 {
 100   tree from, to;
 101   basic_block bb;
 102 } adjust_info;
 103
 104 /* A stack of values to be adjusted in debug stmts.  We have to
 105    process them LIFO, so that the closest substitution applies.  If we
 106    processed them FIFO, without the stack, we might substitute uses
 107    with a PHI DEF that would soon become non-dominant, and when we got
 108    to the suitable one, it wouldn't have anything to substitute any
 109    more.  */
 110 static vec<adjust_info, va_stack> adjust_vec;
 111
 112 /* Adjust any debug stmts that referenced AI->from values to use the
 113    loop-closed AI->to, if the references are dominated by AI->bb and
 114    not by the definition of AI->from.  */
 115
 116 static void
 117 adjust_debug_stmts_now (adjust_info *ai)
 118 {
 119   basic_block bbphi = ai->bb;
 120   tree orig_def = ai->from;
 121   tree new_def = ai->to;
 122   imm_use_iterator imm_iter;
 123   gimple stmt;
 124   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 125
 126   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 127
 128   /* Adjust any debug stmts that held onto non-loop-closed
 129      references.  */
 130   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 131     {
 132       use_operand_p use_p;
 133       basic_block bbuse;
 134
 135       if (!is_gimple_debug (stmt))
 136         continue;
 137
 138       gcc_assert (gimple_debug_bind_p (stmt));
 139
 140       bbuse = gimple_bb (stmt);
 141
 142       if ((bbuse == bbphi
 143            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 144           && !(bbuse == bbdef
 145                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 146         {
 147           if (new_def)
 148             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 149               SET_USE (use_p, new_def);
 150           else
 151             {
 152               gimple_debug_bind_reset_value (stmt);
 153               update_stmt (stmt);
 154             }
 155         }
 156     }
 157 }
 158
 159 /* Adjust debug stmts as scheduled before.  */
 160
 161 static void
 162 adjust_vec_debug_stmts (void)
 163 {
 164   if (!MAY_HAVE_DEBUG_STMTS)
 165     return;
 166
 167   gcc_assert (adjust_vec.exists ());
 168
 169   while (!adjust_vec.is_empty ())
 170     {
 171       adjust_debug_stmts_now (&adjust_vec.last ());
 172       adjust_vec.pop ();
 173     }
 174
 175   adjust_vec.release ();
 176 }
 177
 178 /* Adjust any debug stmts that referenced FROM values to use the
 179    loop-closed TO, if the references are dominated by BB and not by
 180    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 181    will be postponed until adjust_vec_debug_stmts is called.  */
 182
 183 static void
 184 adjust_debug_stmts (tree from, tree to, basic_block bb)
 185 {
 186   adjust_info ai;
 187
 188   if (MAY_HAVE_DEBUG_STMTS
 189       && TREE_CODE (from) == SSA_NAME
 190       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 191       && ! virtual_operand_p (from))
 192     {
 193       ai.from = from;
 194       ai.to = to;
 195       ai.bb = bb;
 196
 197       if (adjust_vec.exists ())
 198         adjust_vec.safe_push (ai);
 199       else
 200         adjust_debug_stmts_now (&ai);
 201     }
 202 }
 203
 204 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 205    to adjust any debug stmts that referenced the old phi arg,
 206    presumably non-loop-closed references left over from other
 207    transformations.  */
 208
 209 static void
 210 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 211 {
 212   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 213
 214   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 215
 216   if (MAY_HAVE_DEBUG_STMTS)
 217     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 218                         gimple_bb (update_phi));
 219 }
 220
 221
 222 /* Update PHI nodes for a guard of the LOOP.
 223
 224    Input:
 225    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 226         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 227         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 228         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 229         We denote this bb NEW_MERGE_BB because before the guard code was added
 230         it had a single predecessor (the LOOP header), and now it became a merge
 231         point of two paths - the path that ends with the LOOP exit-edge, and
 232         the path that ends with GUARD_EDGE.
 233    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 234         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 235
 236    ===> The CFG before the guard-code was added:
 237         LOOP_header_bb:
 238           loop_body
 239           if (exit_loop) goto update_bb
 240           else           goto LOOP_header_bb
 241         update_bb:
 242
 243    ==> The CFG after the guard-code was added:
 244         guard_bb:
 245           if (LOOP_guard_condition) goto new_merge_bb
 246           else                      goto LOOP_header_bb
 247         LOOP_header_bb:
 248           loop_body
 249           if (exit_loop_condition) goto new_merge_bb
 250           else                     goto LOOP_header_bb
 251         new_merge_bb:
 252           goto update_bb
 253         update_bb:
 254
 255    ==> The CFG after this function:
 256         guard_bb:
 257           if (LOOP_guard_condition) goto new_merge_bb
 258           else                      goto LOOP_header_bb
 259         LOOP_header_bb:
 260           loop_body
 261           if (exit_loop_condition) goto new_exit_bb
 262           else                     goto LOOP_header_bb
 263         new_exit_bb:
 264         new_merge_bb:
 265           goto update_bb
 266         update_bb:
 267
 268    This function:
 269    1. creates and updates the relevant phi nodes to account for the new
 270       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 271       1.1. Create phi nodes at NEW_MERGE_BB.
 272       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 273            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 274    2. preserves loop-closed-ssa-form by creating the required phi nodes
 275       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 276
 277    There are two flavors to this function:
 278
 279    slpeel_update_phi_nodes_for_guard1:
 280      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 281      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 282      for variables that have phis in the loop header.
 283
 284    slpeel_update_phi_nodes_for_guard2:
 285      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 286      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 287      for variables that have phis in the loop exit.
 288
 289    I.E., the overall structure is:
 290
 291         loop1_preheader_bb:
 292                 guard1 (goto loop1/merge1_bb)
 293         loop1
 294         loop1_exit_bb:
 295                 guard2 (goto merge1_bb/merge2_bb)
 296         merge1_bb
 297         loop2
 298         loop2_exit_bb
 299         merge2_bb
 300         next_bb
 301
 302    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 303    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 304    that have phis in loop1->header).
 305
 306    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 307    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 308    that have phis in next_bb). It also adds some of these phis to
 309    loop1_exit_bb.
 310
 311    slpeel_update_phi_nodes_for_guard1 is always called before
 312    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 313    to create correct data-flow and loop-closed-ssa-form.
 314
 315    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 316    that change between iterations of a loop (and therefore have a phi-node
 317    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 318    phis for variables that are used out of the loop (and therefore have
 319    loop-closed exit phis). Some variables may be both updated between
 320    iterations and used after the loop. This is why in loop1_exit_bb we
 321    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 322    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 323
 324    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 325      an original loop. i.e., we have:
 326
 327            orig_loop
 328            guard_bb (goto LOOP/new_merge)
 329            new_loop <-- LOOP
 330            new_exit
 331            new_merge
 332            next_bb
 333
 334      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 335      have:
 336
 337            new_loop
 338            guard_bb (goto LOOP/new_merge)
 339            orig_loop <-- LOOP
 340            new_exit
 341            new_merge
 342            next_bb
 343
 344      The SSA names defined in the original loop have a current
 345      reaching definition that that records the corresponding new
 346      ssa-name used in the new duplicated loop copy.
 347   */
 348
 349 /* Function slpeel_update_phi_nodes_for_guard1
 350
 351    Input:
 352    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 353    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 354             information.
 355
 356    In the context of the overall structure, we have:
 357
 358         loop1_preheader_bb:
 359                 guard1 (goto loop1/merge1_bb)
 360 LOOP->  loop1
 361         loop1_exit_bb:
 362                 guard2 (goto merge1_bb/merge2_bb)
 363         merge1_bb
 364         loop2
 365         loop2_exit_bb
 366         merge2_bb
 367         next_bb
 368
 369    For each name updated between loop iterations (i.e - for each name that has
 370    an entry (loop-header) phi in LOOP) we create a new phi in:
 371    1. merge1_bb (to account for the edge from guard1)
 372    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 373 */
 374
 375 static void
 376 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 377                                     bool is_new_loop, basic_block *new_exit_bb)
 378 {
 379   gimple orig_phi, new_phi;
 380   gimple update_phi, update_phi2;
 381   tree guard_arg, loop_arg;
 382   basic_block new_merge_bb = guard_edge->dest;
 383   edge e = EDGE_SUCC (new_merge_bb, 0);
 384   basic_block update_bb = e->dest;
 385   basic_block orig_bb = loop->header;
 386   edge new_exit_e;
 387   tree current_new_name;
 388   gimple_stmt_iterator gsi_orig, gsi_update;
 389
 390   /* Create new bb between loop and new_merge_bb.  */
 391   *new_exit_bb = split_edge (single_exit (loop));
 392
 393   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 394
 395   for (gsi_orig = gsi_start_phis (orig_bb),
 396        gsi_update = gsi_start_phis (update_bb);
 397        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 398        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 399     {
 400       source_location loop_locus, guard_locus;
 401       tree new_res;
 402       orig_phi = gsi_stmt (gsi_orig);
 403       update_phi = gsi_stmt (gsi_update);
 404
 405       /** 1. Handle new-merge-point phis  **/
 406
 407       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 408       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 409       new_phi = create_phi_node (new_res, new_merge_bb);
 410
 411       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 412             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 413       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 414       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 415                                                       EDGE_SUCC (loop->latch,
 416                                                                  0));
 417       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 418       guard_locus
 419         = gimple_phi_arg_location_from_edge (orig_phi,
 420                                              loop_preheader_edge (loop));
 421
 422       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 423       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 424
 425       /* 1.3. Update phi in successor block.  */
 426       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 427                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 428       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 429       update_phi2 = new_phi;
 430
 431
 432       /** 2. Handle loop-closed-ssa-form phis  **/
 433
 434       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 435         continue;
 436
 437       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 438       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 439       new_phi = create_phi_node (new_res, *new_exit_bb);
 440
 441       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 442       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 443
 444       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 445       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 446       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 447                                   PHI_RESULT (new_phi));
 448
 449       /* 2.4. Record the newly created name with set_current_def.
 450          We want to find a name such that
 451                 name = get_current_def (orig_loop_name)
 452          and to set its current definition as follows:
 453                 set_current_def (name, new_phi_name)
 454
 455          If LOOP is a new loop then loop_arg is already the name we're
 456          looking for. If LOOP is the original loop, then loop_arg is
 457          the orig_loop_name and the relevant name is recorded in its
 458          current reaching definition.  */
 459       if (is_new_loop)
 460         current_new_name = loop_arg;
 461       else
 462         {
 463           current_new_name = get_current_def (loop_arg);
 464           /* current_def is not available only if the variable does not
 465              change inside the loop, in which case we also don't care
 466              about recording a current_def for it because we won't be
 467              trying to create loop-exit-phis for it.  */
 468           if (!current_new_name)
 469             continue;
 470         }
 471       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 472
 473       set_current_def (current_new_name, PHI_RESULT (new_phi));
 474     }
 475 }
 476
 477
 478 /* Function slpeel_update_phi_nodes_for_guard2
 479
 480    Input:
 481    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 482
 483    In the context of the overall structure, we have:
 484
 485         loop1_preheader_bb:
 486                 guard1 (goto loop1/merge1_bb)
 487         loop1
 488         loop1_exit_bb:
 489                 guard2 (goto merge1_bb/merge2_bb)
 490         merge1_bb
 491 LOOP->  loop2
 492         loop2_exit_bb
 493         merge2_bb
 494         next_bb
 495
 496    For each name used out side the loop (i.e - for each name that has an exit
 497    phi in next_bb) we create a new phi in:
 498    1. merge2_bb (to account for the edge from guard_bb)
 499    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 500    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 501       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 502 */
 503
 504 static void
 505 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 506                                     bool is_new_loop, basic_block *new_exit_bb)
 507 {
 508   gimple orig_phi, new_phi;
 509   gimple update_phi, update_phi2;
 510   tree guard_arg, loop_arg;
 511   basic_block new_merge_bb = guard_edge->dest;
 512   edge e = EDGE_SUCC (new_merge_bb, 0);
 513   basic_block update_bb = e->dest;
 514   edge new_exit_e;
 515   tree orig_def, orig_def_new_name;
 516   tree new_name, new_name2;
 517   tree arg;
 518   gimple_stmt_iterator gsi;
 519
 520   /* Create new bb between loop and new_merge_bb.  */
 521   *new_exit_bb = split_edge (single_exit (loop));
 522
 523   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 524
 525   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 526     {
 527       tree new_res;
 528       update_phi = gsi_stmt (gsi);
 529       orig_phi = update_phi;
 530       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 531       /* This loop-closed-phi actually doesn't represent a use
 532          out of the loop - the phi arg is a constant.  */
 533       if (TREE_CODE (orig_def) != SSA_NAME)
 534         continue;
 535       orig_def_new_name = get_current_def (orig_def);
 536       arg = NULL_TREE;
 537
 538       /** 1. Handle new-merge-point phis  **/
 539
 540       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 541       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 542       new_phi = create_phi_node (new_res, new_merge_bb);
 543
 544       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 545             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 546       new_name = orig_def;
 547       new_name2 = NULL_TREE;
 548       if (orig_def_new_name)
 549         {
 550           new_name = orig_def_new_name;
 551           /* Some variables have both loop-entry-phis and loop-exit-phis.
 552              Such variables were given yet newer names by phis placed in
 553              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 554              new_name2 = get_current_def (get_current_def (orig_name)).  */
 555           new_name2 = get_current_def (new_name);
 556         }
 557
 558       if (is_new_loop)
 559         {
 560           guard_arg = orig_def;
 561           loop_arg = new_name;
 562         }
 563       else
 564         {
 565           guard_arg = new_name;
 566           loop_arg = orig_def;
 567         }
 568       if (new_name2)
 569         guard_arg = new_name2;
 570
 571       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 572       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 573
 574       /* 1.3. Update phi in successor block.  */
 575       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 576       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 577       update_phi2 = new_phi;
 578
 579
 580       /** 2. Handle loop-closed-ssa-form phis  **/
 581
 582       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 583       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 584       new_phi = create_phi_node (new_res, *new_exit_bb);
 585
 586       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 587       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 588
 589       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 590       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 591       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 592                                   PHI_RESULT (new_phi));
 593
 594
 595       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 596
 597       /* 3.1. Find the relevant names that need an exit-phi in
 598          GUARD_BB, i.e. names for which
 599          slpeel_update_phi_nodes_for_guard1 had not already created a
 600          phi node. This is the case for names that are used outside
 601          the loop (and therefore need an exit phi) but are not updated
 602          across loop iterations (and therefore don't have a
 603          loop-header-phi).
 604
 605          slpeel_update_phi_nodes_for_guard1 is responsible for
 606          creating loop-exit phis in GUARD_BB for names that have a
 607          loop-header-phi.  When such a phi is created we also record
 608          the new name in its current definition.  If this new name
 609          exists, then guard_arg was set to this new name (see 1.2
 610          above).  Therefore, if guard_arg is not this new name, this
 611          is an indication that an exit-phi in GUARD_BB was not yet
 612          created, so we take care of it here.  */
 613       if (guard_arg == new_name2)
 614         continue;
 615       arg = guard_arg;
 616
 617       /* 3.2. Generate new phi node in GUARD_BB:  */
 618       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 619       new_phi = create_phi_node (new_res, guard_edge->src);
 620
 621       /* 3.3. GUARD_BB has one incoming edge:  */
 622       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 623       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 624                    UNKNOWN_LOCATION);
 625
 626       /* 3.4. Update phi in successor of GUARD_BB:  */
 627       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 628                                                                 == guard_arg);
 629       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 630                                   PHI_RESULT (new_phi));
 631     }
 632 }
 633
 634
 635 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 636    that starts at zero, increases by one and its limit is NITERS.
 637
 638    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 639
 640 void
 641 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 642 {
 643   tree indx_before_incr, indx_after_incr;
 644   gimple cond_stmt;
 645   gimple orig_cond;
 646   edge exit_edge = single_exit (loop);
 647   gimple_stmt_iterator loop_cond_gsi;
 648   gimple_stmt_iterator incr_gsi;
 649   bool insert_after;
 650   tree init = build_int_cst (TREE_TYPE (niters), 0);
 651   tree step = build_int_cst (TREE_TYPE (niters), 1);
 652   LOC loop_loc;
 653   enum tree_code code;
 654
 655   orig_cond = get_loop_exit_condition (loop);
 656   gcc_assert (orig_cond);
 657   loop_cond_gsi = gsi_for_stmt (orig_cond);
 658
 659   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 660   create_iv (init, step, NULL_TREE, loop,
 661              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 662
 663   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 664                                               true, NULL_TREE, true,
 665                                               GSI_SAME_STMT);
 666   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 667                                      true, GSI_SAME_STMT);
 668
 669   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 670   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 671                                  NULL_TREE);
 672
 673   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 674
 675   /* Remove old loop exit test:  */
 676   gsi_remove (&loop_cond_gsi, true);
 677   free_stmt_vec_info (orig_cond);
 678
 679   loop_loc = find_loop_location (loop);
 680   if (dump_enabled_p ())
 681     {
 682       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
 683         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
 684                      LOC_LINE (loop_loc));
 685       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 686       dump_printf (MSG_NOTE, "\n");
 687     }
 688   loop->nb_iterations = niters;
 689 }
 690
 691
 692 /* Given LOOP this function generates a new copy of it and puts it
 693    on E which is either the entry or exit of LOOP.  */
 694
 695 struct loop *
 696 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 697 {
 698   struct loop *new_loop;
 699   basic_block *new_bbs, *bbs;
 700   bool at_exit;
 701   bool was_imm_dom;
 702   basic_block exit_dest;
 703   edge exit, new_exit;
 704
 705   exit = single_exit (loop);
 706   at_exit = (e == exit);
 707   if (!at_exit && e != loop_preheader_edge (loop))
 708     return NULL;
 709
 710   bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 711   get_loop_body_with_size (loop, bbs, loop->num_nodes);
 712
 713   /* Check whether duplication is possible.  */
 714   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 715     {
 716       free (bbs);
 717       return NULL;
 718     }
 719
 720   /* Generate new loop structure.  */
 721   new_loop = duplicate_loop (loop, loop_outer (loop));
 722   duplicate_subloops (loop, new_loop);
 723
 724   exit_dest = exit->dest;
 725   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 726                                           exit_dest) == loop->header ?
 727                  true : false);
 728
 729   /* Also copy the pre-header, this avoids jumping through hoops to
 730      duplicate the loop entry PHI arguments.  Create an empty
 731      pre-header unconditionally for this.  */
 732   basic_block preheader = split_edge (loop_preheader_edge (loop));
 733   edge entry_e = single_pred_edge (preheader);
 734   bbs[loop->num_nodes] = preheader;
 735   new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 736
 737   copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
 738             &exit, 1, &new_exit, NULL,
 739             e->src, true);
 740   basic_block new_preheader = new_bbs[loop->num_nodes];
 741
 742   add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
 743
 744   if (at_exit) /* Add the loop copy at exit.  */
 745     {
 746       redirect_edge_and_branch_force (e, new_preheader);
 747       flush_pending_stmts (e);
 748       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 749       if (was_imm_dom)
 750         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 751
 752       /* And remove the non-necessary forwarder again.  Keep the other
 753          one so we have a proper pre-header for the loop at the exit edge.  */
 754       redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
 755       delete_basic_block (preheader);
 756       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 757                                loop_preheader_edge (loop)->src);
 758     }
 759   else /* Add the copy at entry.  */
 760     {
 761       redirect_edge_and_branch_force (entry_e, new_preheader);
 762       flush_pending_stmts (entry_e);
 763       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 764
 765       redirect_edge_and_branch_force (new_exit, preheader);
 766       flush_pending_stmts (new_exit);
 767       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 768
 769       /* And remove the non-necessary forwarder again.  Keep the other
 770          one so we have a proper pre-header for the loop at the exit edge.  */
 771       redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
 772       delete_basic_block (new_preheader);
 773       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 774                                loop_preheader_edge (new_loop)->src);
 775     }
 776
 777   for (unsigned i = 0; i < loop->num_nodes+1; i++)
 778     rename_variables_in_bb (new_bbs[i]);
 779
 780   free (new_bbs);
 781   free (bbs);
 782
 783 #ifdef ENABLE_CHECKING
 784   verify_dominators (CDI_DOMINATORS);
 785 #endif
 786
 787   return new_loop;
 788 }
 789
 790
 791 /* Given the condition statement COND, put it as the last statement
 792    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 793    Assumes that this is the single exit of the guarded loop.
 794    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 795
 796 static edge
 797 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 798                        gimple_seq cond_expr_stmt_list,
 799                        basic_block exit_bb, basic_block dom_bb,
 800                        int probability)
 801 {
 802   gimple_stmt_iterator gsi;
 803   edge new_e, enter_e;
 804   gimple cond_stmt;
 805   gimple_seq gimplify_stmt_list = NULL;
 806
 807   enter_e = EDGE_SUCC (guard_bb, 0);
 808   enter_e->flags &= ~EDGE_FALLTHRU;
 809   enter_e->flags |= EDGE_FALSE_VALUE;
 810   gsi = gsi_last_bb (guard_bb);
 811
 812   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 813                                  NULL_TREE);
 814   if (gimplify_stmt_list)
 815     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 816   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 817   if (cond_expr_stmt_list)
 818     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 819
 820   gsi = gsi_last_bb (guard_bb);
 821   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 822
 823   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 824   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 825
 826   new_e->count = guard_bb->count;
 827   new_e->probability = probability;
 828   new_e->count = apply_probability (enter_e->count, probability);
 829   enter_e->count -= new_e->count;
 830   enter_e->probability = inverse_probability (probability);
 831   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 832   return new_e;
 833 }
 834
 835
 836 /* This function verifies that the following restrictions apply to LOOP:
 837    (1) it is innermost
 838    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 839    (3) it is single entry, single exit
 840    (4) its exit condition is the last stmt in the header
 841    (5) E is the entry/exit edge of LOOP.
 842  */
 843
 844 bool
 845 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 846 {
 847   edge exit_e = single_exit (loop);
 848   edge entry_e = loop_preheader_edge (loop);
 849   gimple orig_cond = get_loop_exit_condition (loop);
 850   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 851
 852   if (loop->inner
 853       /* All loops have an outer scope; the only case loop->outer is NULL is for
 854          the function itself.  */
 855       || !loop_outer (loop)
 856       || loop->num_nodes != 2
 857       || !empty_block_p (loop->latch)
 858       || !single_exit (loop)
 859       /* Verify that new loop exit condition can be trivially modified.  */
 860       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 861       || (e != exit_e && e != entry_e))
 862     return false;
 863
 864   return true;
 865 }
 866
 867 #ifdef ENABLE_CHECKING
 868 static void
 869 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 870                                  struct loop *second_loop)
 871 {
 872   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 873   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 874   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 875
 876   /* A guard that controls whether the second_loop is to be executed or skipped
 877      is placed in first_loop->exit.  first_loop->exit therefore has two
 878      successors - one is the preheader of second_loop, and the other is a bb
 879      after second_loop.
 880    */
 881   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 882
 883   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 884         of second_loop.  */
 885
 886   /* The preheader of new_loop is expected to have two predecessors:
 887      first_loop->exit and the block that precedes first_loop.  */
 888
 889   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 890               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 891                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 892                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 893                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 894
 895   /* Verify that the other successor of first_loop->exit is after the
 896      second_loop.  */
 897   /* TODO */
 898 }
 899 #endif
 900
 901 /* If the run time cost model check determines that vectorization is
 902    not profitable and hence scalar loop should be generated then set
 903    FIRST_NITERS to prologue peeled iterations. This will allow all the
 904    iterations to be executed in the prologue peeled scalar loop.  */
 905
 906 static void
 907 set_prologue_iterations (basic_block bb_before_first_loop,
 908                          tree *first_niters,
 909                          struct loop *loop,
 910                          unsigned int th,
 911                          int probability)
 912 {
 913   edge e;
 914   basic_block cond_bb, then_bb;
 915   tree var, prologue_after_cost_adjust_name;
 916   gimple_stmt_iterator gsi;
 917   gimple newphi;
 918   edge e_true, e_false, e_fallthru;
 919   gimple cond_stmt;
 920   gimple_seq stmts = NULL;
 921   tree cost_pre_condition = NULL_TREE;
 922   tree scalar_loop_iters =
 923     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 924
 925   e = single_pred_edge (bb_before_first_loop);
 926   cond_bb = split_edge(e);
 927
 928   e = single_pred_edge (bb_before_first_loop);
 929   then_bb = split_edge(e);
 930   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 931
 932   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 933                                    EDGE_FALSE_VALUE);
 934   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 935
 936   e_true = EDGE_PRED (then_bb, 0);
 937   e_true->flags &= ~EDGE_FALLTHRU;
 938   e_true->flags |= EDGE_TRUE_VALUE;
 939
 940   e_true->probability = probability;
 941   e_false->probability = inverse_probability (probability);
 942   e_true->count = apply_probability (cond_bb->count, probability);
 943   e_false->count = cond_bb->count - e_true->count;
 944   then_bb->frequency = EDGE_FREQUENCY (e_true);
 945   then_bb->count = e_true->count;
 946
 947   e_fallthru = EDGE_SUCC (then_bb, 0);
 948   e_fallthru->count = then_bb->count;
 949
 950   gsi = gsi_last_bb (cond_bb);
 951   cost_pre_condition =
 952     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 953                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 954   cost_pre_condition =
 955     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
 956                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
 957   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
 958                                            NULL_TREE, NULL_TREE);
 959   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 960
 961   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 962                         "prologue_after_cost_adjust");
 963   prologue_after_cost_adjust_name =
 964     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 965
 966   gsi = gsi_last_bb (then_bb);
 967   if (stmts)
 968     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 969
 970   newphi = create_phi_node (var, bb_before_first_loop);
 971   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
 972                UNKNOWN_LOCATION);
 973   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
 974
 975   *first_niters = PHI_RESULT (newphi);
 976 }
 977
 978 /* Function slpeel_tree_peel_loop_to_edge.
 979
 980    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 981    that is placed on the entry (exit) edge E of LOOP. After this transformation
 982    we have two loops one after the other - first-loop iterates FIRST_NITERS
 983    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 984    If the cost model indicates that it is profitable to emit a scalar
 985    loop instead of the vector one, then the prolog (epilog) loop will iterate
 986    for the entire unchanged scalar iterations of the loop.
 987
 988    Input:
 989    - LOOP: the loop to be peeled.
 990    - E: the exit or entry edge of LOOP.
 991         If it is the entry edge, we peel the first iterations of LOOP. In this
 992         case first-loop is LOOP, and second-loop is the newly created loop.
 993         If it is the exit edge, we peel the last iterations of LOOP. In this
 994         case, first-loop is the newly created loop, and second-loop is LOOP.
 995    - NITERS: the number of iterations that LOOP iterates.
 996    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 997    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 998         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 999         is false, the caller of this function may want to take care of this
1000         (this can be useful if we don't want new stmts added to first-loop).
1001    - TH: cost model profitability threshold of iterations for vectorization.
1002    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1003                           during versioning and hence needs to occur during
1004                           prologue generation or whether cost model check
1005                           has not occurred during prologue generation and hence
1006                           needs to occur during epilogue generation.
1007    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1008    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1009
1010
1011    Output:
1012    The function returns a pointer to the new loop-copy, or NULL if it failed
1013    to perform the transformation.
1014
1015    The function generates two if-then-else guards: one before the first loop,
1016    and the other before the second loop:
1017    The first guard is:
1018      if (FIRST_NITERS == 0) then skip the first loop,
1019      and go directly to the second loop.
1020    The second guard is:
1021      if (FIRST_NITERS == NITERS) then skip the second loop.
1022
1023    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1024    then the generated condition is combined with COND_EXPR and the
1025    statements in COND_EXPR_STMT_LIST are emitted together with it.
1026
1027    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1028    FORNOW the resulting code will not be in loop-closed-ssa form.
1029 */
1030
1031 static struct loop*
1032 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1033                                edge e, tree *first_niters,
1034                                tree niters, bool update_first_loop_count,
1035                                unsigned int th, bool check_profitability,
1036                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1037                                int bound1, int bound2)
1038 {
1039   struct loop *new_loop = NULL, *first_loop, *second_loop;
1040   edge skip_e;
1041   tree pre_condition = NULL_TREE;
1042   basic_block bb_before_second_loop, bb_after_second_loop;
1043   basic_block bb_before_first_loop;
1044   basic_block bb_between_loops;
1045   basic_block new_exit_bb;
1046   gimple_stmt_iterator gsi;
1047   edge exit_e = single_exit (loop);
1048   LOC loop_loc;
1049   tree cost_pre_condition = NULL_TREE;
1050   /* There are many aspects to how likely the first loop is going to be executed.
1051      Without histogram we can't really do good job.  Simply set it to
1052      2/3, so the first loop is not reordered to the end of function and
1053      the hot path through stays short.  */
1054   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1055   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1056   int probability_of_second_loop;
1057
1058   if (!slpeel_can_duplicate_loop_p (loop, e))
1059     return NULL;
1060
1061   /* We might have a queued need to update virtual SSA form.  As we
1062      delete the update SSA machinery below after doing a regular
1063      incremental SSA update during loop copying make sure we don't
1064      lose that fact.
1065      ???  Needing to update virtual SSA form by renaming is unfortunate
1066      but not all of the vectorizer code inserting new loads / stores
1067      properly assigns virtual operands to those statements.  */
1068   update_ssa (TODO_update_ssa_only_virtuals);
1069
1070   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1071      in the exit bb and rename all the uses after the loop.  This simplifies
1072      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1073      (but normally loop closed SSA form doesn't require virtual PHIs to be
1074      in the same form).  Doing this early simplifies the checking what
1075      uses should be renamed.  */
1076   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1077     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1078       {
1079         gimple phi = gsi_stmt (gsi);
1080         for (gsi = gsi_start_phis (exit_e->dest);
1081              !gsi_end_p (gsi); gsi_next (&gsi))
1082           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1083             break;
1084         if (gsi_end_p (gsi))
1085           {
1086             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1087             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1088             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1089             imm_use_iterator imm_iter;
1090             gimple stmt;
1091             use_operand_p use_p;
1092
1093             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1094             gimple_phi_set_result (new_phi, new_vop);
1095             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1096               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1097                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1098                   SET_USE (use_p, new_vop);
1099           }
1100         break;
1101       }
1102
1103   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1104         Resulting CFG would be:
1105
1106         first_loop:
1107         do {
1108         } while ...
1109
1110         second_loop:
1111         do {
1112         } while ...
1113
1114         orig_exit_bb:
1115    */
1116
1117   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1118     {
1119       loop_loc = find_loop_location (loop);
1120       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1121                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1122       return NULL;
1123     }
1124
1125   if (MAY_HAVE_DEBUG_STMTS)
1126     {
1127       gcc_assert (!adjust_vec.exists ());
1128       vec_stack_alloc (adjust_info, adjust_vec, 32);
1129     }
1130
1131   if (e == exit_e)
1132     {
1133       /* NEW_LOOP was placed after LOOP.  */
1134       first_loop = loop;
1135       second_loop = new_loop;
1136     }
1137   else
1138     {
1139       /* NEW_LOOP was placed before LOOP.  */
1140       first_loop = new_loop;
1141       second_loop = loop;
1142     }
1143
1144   /* 2.  Add the guard code in one of the following ways:
1145
1146      2.a Add the guard that controls whether the first loop is executed.
1147          This occurs when this function is invoked for prologue or epilogue
1148          generation and when the cost model check can be done at compile time.
1149
1150          Resulting CFG would be:
1151
1152          bb_before_first_loop:
1153          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1154                                 GOTO first-loop
1155
1156          first_loop:
1157          do {
1158          } while ...
1159
1160          bb_before_second_loop:
1161
1162          second_loop:
1163          do {
1164          } while ...
1165
1166          orig_exit_bb:
1167
1168      2.b Add the cost model check that allows the prologue
1169          to iterate for the entire unchanged scalar
1170          iterations of the loop in the event that the cost
1171          model indicates that the scalar loop is more
1172          profitable than the vector one. This occurs when
1173          this function is invoked for prologue generation
1174          and the cost model check needs to be done at run
1175          time.
1176
1177          Resulting CFG after prologue peeling would be:
1178
1179          if (scalar_loop_iterations <= th)
1180            FIRST_NITERS = scalar_loop_iterations
1181
1182          bb_before_first_loop:
1183          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1184                                 GOTO first-loop
1185
1186          first_loop:
1187          do {
1188          } while ...
1189
1190          bb_before_second_loop:
1191
1192          second_loop:
1193          do {
1194          } while ...
1195
1196          orig_exit_bb:
1197
1198      2.c Add the cost model check that allows the epilogue
1199          to iterate for the entire unchanged scalar
1200          iterations of the loop in the event that the cost
1201          model indicates that the scalar loop is more
1202          profitable than the vector one. This occurs when
1203          this function is invoked for epilogue generation
1204          and the cost model check needs to be done at run
1205          time.  This check is combined with any pre-existing
1206          check in COND_EXPR to avoid versioning.
1207
1208          Resulting CFG after prologue peeling would be:
1209
1210          bb_before_first_loop:
1211          if ((scalar_loop_iterations <= th)
1212              ||
1213              FIRST_NITERS == 0) GOTO bb_before_second_loop
1214                                 GOTO first-loop
1215
1216          first_loop:
1217          do {
1218          } while ...
1219
1220          bb_before_second_loop:
1221
1222          second_loop:
1223          do {
1224          } while ...
1225
1226          orig_exit_bb:
1227   */
1228
1229   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1230   /* Loop copying insterted a forwarder block for us here.  */
1231   bb_before_second_loop = single_exit (first_loop)->dest;
1232
1233   probability_of_second_loop = (inverse_probability (first_guard_probability)
1234                                 + combine_probabilities (second_guard_probability,
1235                                                          first_guard_probability));
1236   /* Theoretically preheader edge of first loop and exit edge should have
1237      same frequencies.  Loop exit probablities are however easy to get wrong.
1238      It is safer to copy value from original loop entry.  */
1239   bb_before_second_loop->frequency
1240      = combine_probabilities (bb_before_first_loop->frequency,
1241                               probability_of_second_loop);
1242   bb_before_second_loop->count
1243      = apply_probability (bb_before_first_loop->count,
1244                           probability_of_second_loop);
1245   single_succ_edge (bb_before_second_loop)->count
1246      = bb_before_second_loop->count;
1247
1248   /* Epilogue peeling.  */
1249   if (!update_first_loop_count)
1250     {
1251       pre_condition =
1252         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1253                      build_int_cst (TREE_TYPE (*first_niters), 0));
1254       if (check_profitability)
1255         {
1256           tree scalar_loop_iters
1257             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1258                                         (loop_vec_info_for_loop (loop)));
1259           cost_pre_condition =
1260             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1261                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1262
1263           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1264                                        cost_pre_condition, pre_condition);
1265         }
1266       if (cond_expr)
1267         {
1268           pre_condition =
1269             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1270                          pre_condition,
1271                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1272                                       cond_expr));
1273         }
1274     }
1275
1276   /* Prologue peeling.  */
1277   else
1278     {
1279       if (check_profitability)
1280         set_prologue_iterations (bb_before_first_loop, first_niters,
1281                                  loop, th, first_guard_probability);
1282
1283       pre_condition =
1284         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1285                      build_int_cst (TREE_TYPE (*first_niters), 0));
1286     }
1287
1288   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1289                                   cond_expr_stmt_list,
1290                                   bb_before_second_loop, bb_before_first_loop,
1291                                   inverse_probability (first_guard_probability));
1292   scale_loop_profile (first_loop, first_guard_probability,
1293                       check_profitability && (int)th > bound1 ? th : bound1);
1294   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1295                                       first_loop == new_loop,
1296                                       &new_exit_bb);
1297
1298
1299   /* 3. Add the guard that controls whether the second loop is executed.
1300         Resulting CFG would be:
1301
1302         bb_before_first_loop:
1303         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1304                                GOTO first-loop
1305
1306         first_loop:
1307         do {
1308         } while ...
1309
1310         bb_between_loops:
1311         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1312                                     GOTO bb_before_second_loop
1313
1314         bb_before_second_loop:
1315
1316         second_loop:
1317         do {
1318         } while ...
1319
1320         bb_after_second_loop:
1321
1322         orig_exit_bb:
1323    */
1324
1325   bb_between_loops = new_exit_bb;
1326   bb_after_second_loop = split_edge (single_exit (second_loop));
1327
1328   pre_condition =
1329         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1330   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1331                                   bb_after_second_loop, bb_before_first_loop,
1332                                   inverse_probability (second_guard_probability));
1333   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1334   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1335                                      second_loop == new_loop, &new_exit_bb);
1336
1337   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1338    */
1339   if (update_first_loop_count)
1340     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1341
1342   delete_update_ssa ();
1343
1344   adjust_vec_debug_stmts ();
1345
1346   return new_loop;
1347 }
1348
1349 /* Function vect_get_loop_location.
1350
1351    Extract the location of the loop in the source code.
1352    If the loop is not well formed for vectorization, an estimated
1353    location is calculated.
1354    Return the loop location if succeed and NULL if not.  */
1355
1356 LOC
1357 find_loop_location (struct loop *loop)
1358 {
1359   gimple stmt = NULL;
1360   basic_block bb;
1361   gimple_stmt_iterator si;
1362
1363   if (!loop)
1364     return UNKNOWN_LOC;
1365
1366   stmt = get_loop_exit_condition (loop);
1367
1368   if (stmt
1369       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1370     return gimple_location (stmt);
1371
1372   /* If we got here the loop is probably not "well formed",
1373      try to estimate the loop location */
1374
1375   if (!loop->header)
1376     return UNKNOWN_LOC;
1377
1378   bb = loop->header;
1379
1380   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1381     {
1382       stmt = gsi_stmt (si);
1383       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1384         return gimple_location (stmt);
1385     }
1386
1387   return UNKNOWN_LOC;
1388 }
1389
1390
1391 /* This function builds ni_name = number of iterations loop executes
1392    on the loop preheader.  If SEQ is given the stmt is instead emitted
1393    there.  */
1394
1395 static tree
1396 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1397 {
1398   tree ni_name, var;
1399   gimple_seq stmts = NULL;
1400   edge pe;
1401   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1402   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1403
1404   var = create_tmp_var (TREE_TYPE (ni), "niters");
1405   ni_name = force_gimple_operand (ni, &stmts, false, var);
1406
1407   pe = loop_preheader_edge (loop);
1408   if (stmts)
1409     {
1410       if (seq)
1411         gimple_seq_add_seq (&seq, stmts);
1412       else
1413         {
1414           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1415           gcc_assert (!new_bb);
1416         }
1417     }
1418
1419   return ni_name;
1420 }
1421
1422
1423 /* This function generates the following statements:
1424
1425  ni_name = number of iterations loop executes
1426  ratio = ni_name / vf
1427  ratio_mult_vf_name = ratio * vf
1428
1429  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1430  if that is non-NULL.  */
1431
1432 static void
1433 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1434                                  tree *ni_name_ptr,
1435                                  tree *ratio_mult_vf_name_ptr,
1436                                  tree *ratio_name_ptr,
1437                                  gimple_seq cond_expr_stmt_list)
1438 {
1439
1440   edge pe;
1441   basic_block new_bb;
1442   gimple_seq stmts;
1443   tree ni_name, ni_minus_gap_name;
1444   tree var;
1445   tree ratio_name;
1446   tree ratio_mult_vf_name;
1447   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1448   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1449   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1450   tree log_vf;
1451
1452   pe = loop_preheader_edge (loop);
1453
1454   /* Generate temporary variable that contains
1455      number of iterations loop executes.  */
1456
1457   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1458   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1459
1460   /* If epilogue loop is required because of data accesses with gaps, we
1461      subtract one iteration from the total number of iterations here for
1462      correct calculation of RATIO.  */
1463   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1464     {
1465       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1466                                        ni_name,
1467                                        build_one_cst (TREE_TYPE (ni_name)));
1468       if (!is_gimple_val (ni_minus_gap_name))
1469         {
1470           var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1471
1472           stmts = NULL;
1473           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1474                                                     true, var);
1475           if (cond_expr_stmt_list)
1476             gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1477           else
1478             {
1479               pe = loop_preheader_edge (loop);
1480               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1481               gcc_assert (!new_bb);
1482             }
1483         }
1484     }
1485   else
1486     ni_minus_gap_name = ni_name;
1487
1488   /* Create: ratio = ni >> log2(vf) */
1489
1490   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1491                             ni_minus_gap_name, log_vf);
1492   if (!is_gimple_val (ratio_name))
1493     {
1494       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1495
1496       stmts = NULL;
1497       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1498       if (cond_expr_stmt_list)
1499         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1500       else
1501         {
1502           pe = loop_preheader_edge (loop);
1503           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1504           gcc_assert (!new_bb);
1505         }
1506     }
1507
1508   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1509
1510   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1511                                     ratio_name, log_vf);
1512   if (!is_gimple_val (ratio_mult_vf_name))
1513     {
1514       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1515
1516       stmts = NULL;
1517       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1518                                                  true, var);
1519       if (cond_expr_stmt_list)
1520         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1521       else
1522         {
1523           pe = loop_preheader_edge (loop);
1524           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1525           gcc_assert (!new_bb);
1526         }
1527     }
1528
1529   *ni_name_ptr = ni_name;
1530   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1531   *ratio_name_ptr = ratio_name;
1532
1533   return;
1534 }
1535
1536 /* Function vect_can_advance_ivs_p
1537
1538    In case the number of iterations that LOOP iterates is unknown at compile
1539    time, an epilog loop will be generated, and the loop induction variables
1540    (IVs) will be "advanced" to the value they are supposed to take just before
1541    the epilog loop.  Here we check that the access function of the loop IVs
1542    and the expression that represents the loop bound are simple enough.
1543    These restrictions will be relaxed in the future.  */
1544
1545 bool
1546 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1547 {
1548   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1549   basic_block bb = loop->header;
1550   gimple phi;
1551   gimple_stmt_iterator gsi;
1552
1553   /* Analyze phi functions of the loop header.  */
1554
1555   if (dump_enabled_p ())
1556     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
1557   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1558     {
1559       tree evolution_part;
1560
1561       phi = gsi_stmt (gsi);
1562       if (dump_enabled_p ())
1563         {
1564           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1565           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1566           dump_printf (MSG_NOTE, "\n");
1567         }
1568
1569       /* Skip virtual phi's. The data dependences that are associated with
1570          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1571
1572       if (virtual_operand_p (PHI_RESULT (phi)))
1573         {
1574           if (dump_enabled_p ())
1575             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1576                              "virtual phi. skip.\n");
1577           continue;
1578         }
1579
1580       /* Skip reduction phis.  */
1581
1582       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1583         {
1584           if (dump_enabled_p ())
1585             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1586                              "reduc phi. skip.\n");
1587           continue;
1588         }
1589
1590       /* Analyze the evolution function.  */
1591
1592       evolution_part
1593         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
1594       if (evolution_part == NULL_TREE)
1595         {
1596           if (dump_enabled_p ())
1597             dump_printf (MSG_MISSED_OPTIMIZATION,
1598                          "No access function or evolution.\n");
1599           return false;
1600         }
1601
1602       /* FORNOW: We do not transform initial conditions of IVs
1603          which evolution functions are a polynomial of degree >= 2.  */
1604
1605       if (tree_is_chrec (evolution_part))
1606         return false;
1607     }
1608
1609   return true;
1610 }
1611
1612
1613 /*   Function vect_update_ivs_after_vectorizer.
1614
1615      "Advance" the induction variables of LOOP to the value they should take
1616      after the execution of LOOP.  This is currently necessary because the
1617      vectorizer does not handle induction variables that are used after the
1618      loop.  Such a situation occurs when the last iterations of LOOP are
1619      peeled, because:
1620      1. We introduced new uses after LOOP for IVs that were not originally used
1621         after LOOP: the IVs of LOOP are now used by an epilog loop.
1622      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1623         times, whereas the loop IVs should be bumped N times.
1624
1625      Input:
1626      - LOOP - a loop that is going to be vectorized. The last few iterations
1627               of LOOP were peeled.
1628      - NITERS - the number of iterations that LOOP executes (before it is
1629                 vectorized). i.e, the number of times the ivs should be bumped.
1630      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1631                   coming out from LOOP on which there are uses of the LOOP ivs
1632                   (this is the path from LOOP->exit to epilog_loop->preheader).
1633
1634                   The new definitions of the ivs are placed in LOOP->exit.
1635                   The phi args associated with the edge UPDATE_E in the bb
1636                   UPDATE_E->dest are updated accordingly.
1637
1638      Assumption 1: Like the rest of the vectorizer, this function assumes
1639      a single loop exit that has a single predecessor.
1640
1641      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1642      organized in the same order.
1643
1644      Assumption 3: The access function of the ivs is simple enough (see
1645      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1646
1647      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1648      coming out of LOOP on which the ivs of LOOP are used (this is the path
1649      that leads to the epilog loop; other paths skip the epilog loop).  This
1650      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1651      needs to have its phis updated.
1652  */
1653
1654 static void
1655 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1656                                   edge update_e)
1657 {
1658   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1659   basic_block exit_bb = single_exit (loop)->dest;
1660   gimple phi, phi1;
1661   gimple_stmt_iterator gsi, gsi1;
1662   basic_block update_bb = update_e->dest;
1663
1664   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1665
1666   /* Make sure there exists a single-predecessor exit bb:  */
1667   gcc_assert (single_pred_p (exit_bb));
1668
1669   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1670        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1671        gsi_next (&gsi), gsi_next (&gsi1))
1672     {
1673       tree init_expr;
1674       tree step_expr, off;
1675       tree type;
1676       tree var, ni, ni_name;
1677       gimple_stmt_iterator last_gsi;
1678       stmt_vec_info stmt_info;
1679
1680       phi = gsi_stmt (gsi);
1681       phi1 = gsi_stmt (gsi1);
1682       if (dump_enabled_p ())
1683         {
1684           dump_printf_loc (MSG_NOTE, vect_location,
1685                            "vect_update_ivs_after_vectorizer: phi: ");
1686           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1687           dump_printf (MSG_NOTE, "\n");
1688         }
1689
1690       /* Skip virtual phi's.  */
1691       if (virtual_operand_p (PHI_RESULT (phi)))
1692         {
1693           if (dump_enabled_p ())
1694             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1695                              "virtual phi. skip.\n");
1696           continue;
1697         }
1698
1699       /* Skip reduction phis.  */
1700       stmt_info = vinfo_for_stmt (phi);
1701       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1702         {
1703           if (dump_enabled_p ())
1704             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1705                              "reduc phi. skip.\n");
1706           continue;
1707         }
1708
1709       type = TREE_TYPE (gimple_phi_result (phi));
1710       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1711       step_expr = unshare_expr (step_expr);
1712
1713       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1714          of degree >= 2 or exponential.  */
1715       gcc_assert (!tree_is_chrec (step_expr));
1716
1717       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1718
1719       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1720                          fold_convert (TREE_TYPE (step_expr), niters),
1721                          step_expr);
1722       if (POINTER_TYPE_P (type))
1723         ni = fold_build_pointer_plus (init_expr, off);
1724       else
1725         ni = fold_build2 (PLUS_EXPR, type,
1726                           init_expr, fold_convert (type, off));
1727
1728       var = create_tmp_var (type, "tmp");
1729
1730       last_gsi = gsi_last_bb (exit_bb);
1731       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1732                                           true, GSI_SAME_STMT);
1733
1734       /* Fix phi expressions in the successor bb.  */
1735       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1736     }
1737 }
1738
1739 /* Function vect_do_peeling_for_loop_bound
1740
1741    Peel the last iterations of the loop represented by LOOP_VINFO.
1742    The peeled iterations form a new epilog loop.  Given that the loop now
1743    iterates NITERS times, the new epilog loop iterates
1744    NITERS % VECTORIZATION_FACTOR times.
1745
1746    The original loop will later be made to iterate
1747    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1748
1749    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1750    test.  */
1751
1752 void
1753 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1754                                 unsigned int th, bool check_profitability)
1755 {
1756   tree ni_name, ratio_mult_vf_name;
1757   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1758   struct loop *new_loop;
1759   edge update_e;
1760   basic_block preheader;
1761   int loop_num;
1762   int max_iter;
1763   tree cond_expr = NULL_TREE;
1764   gimple_seq cond_expr_stmt_list = NULL;
1765
1766   if (dump_enabled_p ())
1767     dump_printf_loc (MSG_NOTE, vect_location,
1768                      "=== vect_do_peeling_for_loop_bound ===\n");
1769
1770   initialize_original_copy_tables ();
1771
1772   /* Generate the following variables on the preheader of original loop:
1773
1774      ni_name = number of iteration the original loop executes
1775      ratio = ni_name / vf
1776      ratio_mult_vf_name = ratio * vf  */
1777   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1778                                    &ratio_mult_vf_name, ratio,
1779                                    cond_expr_stmt_list);
1780
1781   loop_num  = loop->num;
1782
1783   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1784                                             &ratio_mult_vf_name, ni_name, false,
1785                                             th, check_profitability,
1786                                             cond_expr, cond_expr_stmt_list,
1787                                             0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1788   gcc_assert (new_loop);
1789   gcc_assert (loop_num == loop->num);
1790 #ifdef ENABLE_CHECKING
1791   slpeel_verify_cfg_after_peeling (loop, new_loop);
1792 #endif
1793
1794   /* A guard that controls whether the new_loop is to be executed or skipped
1795      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1796      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1797      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1798      is on the path where the LOOP IVs are used and need to be updated.  */
1799
1800   preheader = loop_preheader_edge (new_loop)->src;
1801   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1802     update_e = EDGE_PRED (preheader, 0);
1803   else
1804     update_e = EDGE_PRED (preheader, 1);
1805
1806   /* Update IVs of original loop as if they were advanced
1807      by ratio_mult_vf_name steps.  */
1808   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1809
1810   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1811      and this means N-2 loopback edge executions.
1812
1813      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1814      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1815   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1816               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1817               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1818   if (check_profitability)
1819     max_iter = MAX (max_iter, (int) th - 1);
1820   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1821   dump_printf (MSG_NOTE,
1822                "Setting upper bound of nb iterations for epilogue "
1823                "loop to %d\n", max_iter);
1824
1825   /* After peeling we have to reset scalar evolution analyzer.  */
1826   scev_reset ();
1827
1828   free_original_copy_tables ();
1829 }
1830
1831
1832 /* Function vect_gen_niters_for_prolog_loop
1833
1834    Set the number of iterations for the loop represented by LOOP_VINFO
1835    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1836    and the misalignment of DR - the data reference recorded in
1837    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1838    this loop, the data reference DR will refer to an aligned location.
1839
1840    The following computation is generated:
1841
1842    If the misalignment of DR is known at compile time:
1843      addr_mis = int mis = DR_MISALIGNMENT (dr);
1844    Else, compute address misalignment in bytes:
1845      addr_mis = addr & (vectype_align - 1)
1846
1847    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1848
1849    (elem_size = element type size; an element is the scalar element whose type
1850    is the inner type of the vectype)
1851
1852    When the step of the data-ref in the loop is not 1 (as in interleaved data
1853    and SLP), the number of iterations of the prolog must be divided by the step
1854    (which is equal to the size of interleaved group).
1855
1856    The above formulas assume that VF == number of elements in the vector. This
1857    may not hold when there are multiple-types in the loop.
1858    In this case, for some data-references in the loop the VF does not represent
1859    the number of elements that fit in the vector.  Therefore, instead of VF we
1860    use TYPE_VECTOR_SUBPARTS.  */
1861
1862 static tree
1863 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1864 {
1865   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1866   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1867   tree var;
1868   gimple_seq stmts;
1869   tree iters, iters_name;
1870   edge pe;
1871   basic_block new_bb;
1872   gimple dr_stmt = DR_STMT (dr);
1873   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1874   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1875   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1876   tree niters_type = TREE_TYPE (loop_niters);
1877   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1878
1879   pe = loop_preheader_edge (loop);
1880
1881   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1882     {
1883       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1884
1885       if (dump_enabled_p ())
1886         dump_printf_loc (MSG_NOTE, vect_location,
1887                          "known peeling = %d.\n", npeel);
1888
1889       iters = build_int_cst (niters_type, npeel);
1890       *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1891     }
1892   else
1893     {
1894       gimple_seq new_stmts = NULL;
1895       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1896       tree offset = negative
1897           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1898       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1899                                                 &new_stmts, offset, loop);
1900       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1901       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1902       HOST_WIDE_INT elem_size =
1903                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1904       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1905       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1906       tree nelements_tree = build_int_cst (type, nelements);
1907       tree byte_misalign;
1908       tree elem_misalign;
1909
1910       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1911       gcc_assert (!new_bb);
1912
1913       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1914       byte_misalign =
1915         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1916                      vectype_align_minus_1);
1917
1918       /* Create:  elem_misalign = byte_misalign / element_size  */
1919       elem_misalign =
1920         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1921
1922       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1923       if (negative)
1924         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1925       else
1926         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1927       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1928       iters = fold_convert (niters_type, iters);
1929       *bound = nelements;
1930     }
1931
1932   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1933   /* If the loop bound is known at compile time we already verified that it is
1934      greater than vf; since the misalignment ('iters') is at most vf, there's
1935      no need to generate the MIN_EXPR in this case.  */
1936   if (TREE_CODE (loop_niters) != INTEGER_CST)
1937     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1938
1939   if (dump_enabled_p ())
1940     {
1941       dump_printf_loc (MSG_NOTE, vect_location,
1942                        "niters for prolog loop: ");
1943       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1944       dump_printf (MSG_NOTE, "\n");
1945     }
1946
1947   var = create_tmp_var (niters_type, "prolog_loop_niters");
1948   stmts = NULL;
1949   iters_name = force_gimple_operand (iters, &stmts, false, var);
1950
1951   /* Insert stmt on loop preheader edge.  */
1952   if (stmts)
1953     {
1954       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1955       gcc_assert (!new_bb);
1956     }
1957
1958   return iters_name;
1959 }
1960
1961
1962 /* Function vect_update_init_of_dr
1963
1964    NITERS iterations were peeled from LOOP.  DR represents a data reference
1965    in LOOP.  This function updates the information recorded in DR to
1966    account for the fact that the first NITERS iterations had already been
1967    executed.  Specifically, it updates the OFFSET field of DR.  */
1968
1969 static void
1970 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1971 {
1972   tree offset = DR_OFFSET (dr);
1973
1974   niters = fold_build2 (MULT_EXPR, sizetype,
1975                         fold_convert (sizetype, niters),
1976                         fold_convert (sizetype, DR_STEP (dr)));
1977   offset = fold_build2 (PLUS_EXPR, sizetype,
1978                         fold_convert (sizetype, offset), niters);
1979   DR_OFFSET (dr) = offset;
1980 }
1981
1982
1983 /* Function vect_update_inits_of_drs
1984
1985    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1986    This function updates the information recorded for the data references in
1987    the loop to account for the fact that the first NITERS iterations had
1988    already been executed.  Specifically, it updates the initial_condition of
1989    the access_function of all the data_references in the loop.  */
1990
1991 static void
1992 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1993 {
1994   unsigned int i;
1995   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1996   struct data_reference *dr;
1997
1998  if (dump_enabled_p ())
1999     dump_printf_loc (MSG_NOTE, vect_location,
2000                      "=== vect_update_inits_of_dr ===\n");
2001
2002   FOR_EACH_VEC_ELT (datarefs, i, dr)
2003     vect_update_init_of_dr (dr, niters);
2004 }
2005
2006
2007 /* Function vect_do_peeling_for_alignment
2008
2009    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2010    'niters' is set to the misalignment of one of the data references in the
2011    loop, thereby forcing it to refer to an aligned location at the beginning
2012    of the execution of this loop.  The data reference for which we are
2013    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2014
2015 void
2016 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2017                                unsigned int th, bool check_profitability)
2018 {
2019   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2020   tree niters_of_prolog_loop, ni_name;
2021   tree n_iters;
2022   tree wide_prolog_niters;
2023   struct loop *new_loop;
2024   int max_iter;
2025   int bound = 0;
2026
2027   if (dump_enabled_p ())
2028     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2029                      "loop peeled for vectorization to enhance"
2030                      " alignment\n");
2031
2032   initialize_original_copy_tables ();
2033
2034   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2035   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2036                                                            ni_name,
2037                                                            &bound);
2038
2039   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2040   new_loop =
2041     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2042                                    &niters_of_prolog_loop, ni_name, true,
2043                                    th, check_profitability, NULL_TREE, NULL,
2044                                    bound,
2045                                    0);
2046
2047   gcc_assert (new_loop);
2048 #ifdef ENABLE_CHECKING
2049   slpeel_verify_cfg_after_peeling (new_loop, loop);
2050 #endif
2051   /* For vectorization factor N, we need to copy at most N-1 values
2052      for alignment and this means N-2 loopback edge executions.  */
2053   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2054   if (check_profitability)
2055     max_iter = MAX (max_iter, (int) th - 1);
2056   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2057   dump_printf (MSG_NOTE,
2058                "Setting upper bound of nb iterations for prologue "
2059                "loop to %d\n", max_iter);
2060
2061   /* Update number of times loop executes.  */
2062   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2063   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2064                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2065
2066   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2067     wide_prolog_niters = niters_of_prolog_loop;
2068   else
2069     {
2070       gimple_seq seq = NULL;
2071       edge pe = loop_preheader_edge (loop);
2072       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2073       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2074       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2075                                                  var);
2076       if (seq)
2077         {
2078           /* Insert stmt on loop preheader edge.  */
2079           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2080           gcc_assert (!new_bb);
2081         }
2082     }
2083
2084   /* Update the init conditions of the access functions of all data refs.  */
2085   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2086
2087   /* After peeling we have to reset scalar evolution analyzer.  */
2088   scev_reset ();
2089
2090   free_original_copy_tables ();
2091 }
2092
2093
2094 /* Function vect_create_cond_for_align_checks.
2095
2096    Create a conditional expression that represents the alignment checks for
2097    all of data references (array element references) whose alignment must be
2098    checked at runtime.
2099
2100    Input:
2101    COND_EXPR  - input conditional expression.  New conditions will be chained
2102                 with logical AND operation.
2103    LOOP_VINFO - two fields of the loop information are used.
2104                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2105                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2106
2107    Output:
2108    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2109                          expression.
2110    The returned value is the conditional expression to be used in the if
2111    statement that controls which version of the loop gets executed at runtime.
2112
2113    The algorithm makes two assumptions:
2114      1) The number of bytes "n" in a vector is a power of 2.
2115      2) An address "a" is aligned if a%n is zero and that this
2116         test can be done as a&(n-1) == 0.  For example, for 16
2117         byte vectors the test is a&0xf == 0.  */
2118
2119 static void
2120 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2121                                    tree *cond_expr,
2122                                    gimple_seq *cond_expr_stmt_list)
2123 {
2124   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2125   vec<gimple> may_misalign_stmts
2126     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2127   gimple ref_stmt;
2128   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2129   tree mask_cst;
2130   unsigned int i;
2131   tree int_ptrsize_type;
2132   char tmp_name[20];
2133   tree or_tmp_name = NULL_TREE;
2134   tree and_tmp_name;
2135   gimple and_stmt;
2136   tree ptrsize_zero;
2137   tree part_cond_expr;
2138
2139   /* Check that mask is one less than a power of 2, i.e., mask is
2140      all zeros followed by all ones.  */
2141   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2142
2143   int_ptrsize_type = signed_type_for (ptr_type_node);
2144
2145   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2146      of the first vector of the i'th data reference. */
2147
2148   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2149     {
2150       gimple_seq new_stmt_list = NULL;
2151       tree addr_base;
2152       tree addr_tmp_name;
2153       tree new_or_tmp_name;
2154       gimple addr_stmt, or_stmt;
2155       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2156       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2157       bool negative = tree_int_cst_compare
2158         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2159       tree offset = negative
2160         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2161
2162       /* create: addr_tmp = (int)(address_of_first_vector) */
2163       addr_base =
2164         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2165                                               offset, loop);
2166       if (new_stmt_list != NULL)
2167         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2168
2169       sprintf (tmp_name, "addr2int%d", i);
2170       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2171       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2172                                                 addr_base, NULL_TREE);
2173       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2174
2175       /* The addresses are OR together.  */
2176
2177       if (or_tmp_name != NULL_TREE)
2178         {
2179           /* create: or_tmp = or_tmp | addr_tmp */
2180           sprintf (tmp_name, "orptrs%d", i);
2181           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2182           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2183                                                   new_or_tmp_name,
2184                                                   or_tmp_name, addr_tmp_name);
2185           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2186           or_tmp_name = new_or_tmp_name;
2187         }
2188       else
2189         or_tmp_name = addr_tmp_name;
2190
2191     } /* end for i */
2192
2193   mask_cst = build_int_cst (int_ptrsize_type, mask);
2194
2195   /* create: and_tmp = or_tmp & mask  */
2196   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2197
2198   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2199                                            or_tmp_name, mask_cst);
2200   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2201
2202   /* Make and_tmp the left operand of the conditional test against zero.
2203      if and_tmp has a nonzero bit then some address is unaligned.  */
2204   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2205   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2206                                 and_tmp_name, ptrsize_zero);
2207   if (*cond_expr)
2208     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2209                               *cond_expr, part_cond_expr);
2210   else
2211     *cond_expr = part_cond_expr;
2212 }
2213
2214
2215 /* Function vect_vfa_segment_size.
2216
2217    Create an expression that computes the size of segment
2218    that will be accessed for a data reference.  The functions takes into
2219    account that realignment loads may access one more vector.
2220
2221    Input:
2222      DR: The data reference.
2223      LENGTH_FACTOR: segment length to consider.
2224
2225    Return an expression whose value is the size of segment which will be
2226    accessed by DR.  */
2227
2228 static tree
2229 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2230 {
2231   tree segment_length;
2232
2233   if (integer_zerop (DR_STEP (dr)))
2234     segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2235   else
2236     segment_length = size_binop (MULT_EXPR,
2237                                  fold_convert (sizetype, DR_STEP (dr)),
2238                                  fold_convert (sizetype, length_factor));
2239
2240   if (vect_supportable_dr_alignment (dr, false)
2241         == dr_explicit_realign_optimized)
2242     {
2243       tree vector_size = TYPE_SIZE_UNIT
2244                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2245
2246       segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2247     }
2248   return segment_length;
2249 }
2250
2251
2252 /* Function vect_create_cond_for_alias_checks.
2253
2254    Create a conditional expression that represents the run-time checks for
2255    overlapping of address ranges represented by a list of data references
2256    relations passed as input.
2257
2258    Input:
2259    COND_EXPR  - input conditional expression.  New conditions will be chained
2260                 with logical AND operation.
2261    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2262                 to be checked.
2263
2264    Output:
2265    COND_EXPR - conditional expression.
2266
2267    The returned value is the conditional expression to be used in the if
2268    statement that controls which version of the loop gets executed at runtime.
2269 */
2270
2271 static void
2272 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2273 {
2274   vec<ddr_p>  may_alias_ddrs =
2275     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2276   int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2277   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2278
2279   ddr_p ddr;
2280   unsigned int i;
2281   tree part_cond_expr, length_factor;
2282
2283   /* Create expression
2284      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2285      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2286      &&
2287      ...
2288      &&
2289      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2290      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2291
2292   if (may_alias_ddrs.is_empty ())
2293     return;
2294
2295   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2296     {
2297       struct data_reference *dr_a, *dr_b;
2298       gimple dr_group_first_a, dr_group_first_b;
2299       tree addr_base_a, addr_base_b;
2300       tree segment_length_a, segment_length_b;
2301       gimple stmt_a, stmt_b;
2302       tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2303
2304       dr_a = DDR_A (ddr);
2305       stmt_a = DR_STMT (DDR_A (ddr));
2306       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2307       if (dr_group_first_a)
2308         {
2309           stmt_a = dr_group_first_a;
2310           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2311         }
2312
2313       dr_b = DDR_B (ddr);
2314       stmt_b = DR_STMT (DDR_B (ddr));
2315       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2316       if (dr_group_first_b)
2317         {
2318           stmt_b = dr_group_first_b;
2319           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2320         }
2321
2322       addr_base_a
2323         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a),
2324                                    size_binop (PLUS_EXPR, DR_OFFSET (dr_a),
2325                                                DR_INIT (dr_a)));
2326       addr_base_b
2327         = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b),
2328                                    size_binop (PLUS_EXPR, DR_OFFSET (dr_b),
2329                                                DR_INIT (dr_b)));
2330
2331       if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2332         length_factor = scalar_loop_iters;
2333       else
2334         length_factor = size_int (vect_factor);
2335       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2336       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2337
2338       if (dump_enabled_p ())
2339         {
2340           dump_printf_loc (MSG_NOTE, vect_location,
2341                            "create runtime check for data references ");
2342           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
2343           dump_printf (MSG_NOTE, " and ");
2344           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
2345           dump_printf (MSG_NOTE, "\n");
2346         }
2347
2348       seg_a_min = addr_base_a;
2349       seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2350       if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2351         seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2352
2353       seg_b_min = addr_base_b;
2354       seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2355       if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2356         seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2357
2358       part_cond_expr =
2359         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2360           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2361           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2362
2363       if (*cond_expr)
2364         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2365                                   *cond_expr, part_cond_expr);
2366       else
2367         *cond_expr = part_cond_expr;
2368     }
2369
2370   if (dump_enabled_p ())
2371     dump_printf_loc (MSG_NOTE, vect_location,
2372                      "created %u versioning for alias checks.\n",
2373                      may_alias_ddrs.length ());
2374 }
2375
2376
2377 /* Function vect_loop_versioning.
2378
2379    If the loop has data references that may or may not be aligned or/and
2380    has data reference relations whose independence was not proven then
2381    two versions of the loop need to be generated, one which is vectorized
2382    and one which isn't.  A test is then generated to control which of the
2383    loops is executed.  The test checks for the alignment of all of the
2384    data references that may or may not be aligned.  An additional
2385    sequence of runtime tests is generated for each pairs of DDRs whose
2386    independence was not proven.  The vectorized version of loop is
2387    executed only if both alias and alignment tests are passed.
2388
2389    The test generated to check which version of loop is executed
2390    is modified to also check for profitability as indicated by the
2391    cost model initially.
2392
2393    The versioning precondition(s) are placed in *COND_EXPR and
2394    *COND_EXPR_STMT_LIST.  */
2395
2396 void
2397 vect_loop_versioning (loop_vec_info loop_vinfo,
2398                       unsigned int th, bool check_profitability)
2399 {
2400   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2401   basic_block condition_bb;
2402   gimple_stmt_iterator gsi, cond_exp_gsi;
2403   basic_block merge_bb;
2404   basic_block new_exit_bb;
2405   edge new_exit_e, e;
2406   gimple orig_phi, new_phi;
2407   tree cond_expr = NULL_TREE;
2408   gimple_seq cond_expr_stmt_list = NULL;
2409   tree arg;
2410   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2411   gimple_seq gimplify_stmt_list = NULL;
2412   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2413   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2414   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2415
2416   if (check_profitability)
2417     {
2418       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2419                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2420       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2421                                           is_gimple_condexpr, NULL_TREE);
2422     }
2423
2424   if (version_align)
2425     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2426                                        &cond_expr_stmt_list);
2427
2428   if (version_alias)
2429     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2430
2431   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2432                                       is_gimple_condexpr, NULL_TREE);
2433   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2434
2435   initialize_original_copy_tables ();
2436   loop_version (loop, cond_expr, &condition_bb,
2437                 prob, prob, REG_BR_PROB_BASE - prob, true);
2438
2439   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC
2440       && dump_enabled_p ())
2441     {
2442       if (version_alias)
2443         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2444                          "loop versioned for vectorization because of "
2445                          "possible aliasing\n");
2446       if (version_align)
2447         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2448                          "loop versioned for vectorization to enhance "
2449                          "alignment\n");
2450
2451     }
2452   free_original_copy_tables();
2453
2454   /* Loop versioning violates an assumption we try to maintain during
2455      vectorization - that the loop exit block has a single predecessor.
2456      After versioning, the exit block of both loop versions is the same
2457      basic block (i.e. it has two predecessors). Just in order to simplify
2458      following transformations in the vectorizer, we fix this situation
2459      here by adding a new (empty) block on the exit-edge of the loop,
2460      with the proper loop-exit phis to maintain loop-closed-form.  */
2461
2462   merge_bb = single_exit (loop)->dest;
2463   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2464   new_exit_bb = split_edge (single_exit (loop));
2465   new_exit_e = single_exit (loop);
2466   e = EDGE_SUCC (new_exit_bb, 0);
2467
2468   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2469     {
2470       tree new_res;
2471       orig_phi = gsi_stmt (gsi);
2472       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2473       new_phi = create_phi_node (new_res, new_exit_bb);
2474       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2475       add_phi_arg (new_phi, arg, new_exit_e,
2476                    gimple_phi_arg_location_from_edge (orig_phi, e));
2477       adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2478     }
2479
2480   /* End loop-exit-fixes after versioning.  */
2481
2482   if (cond_expr_stmt_list)
2483     {
2484       cond_exp_gsi = gsi_last_bb (condition_bb);
2485       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2486                              GSI_SAME_STMT);
2487     }
2488   update_ssa (TODO_update_ssa);
2489 }