gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-flow.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "diagnostic-core.h"
  35 #include "tree-scalar-evolution.h"
  36 #include "tree-vectorizer.h"
  37 #include "langhooks.h"
  38
  39 /*************************************************************************
  40   Simple Loop Peeling Utilities
  41
  42   Utilities to support loop peeling for vectorization purposes.
  43  *************************************************************************/
  44
  45
  46 /* Renames the use *OP_P.  */
  47
  48 static void
  49 rename_use_op (use_operand_p op_p)
  50 {
  51   tree new_name;
  52
  53   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  54     return;
  55
  56   new_name = get_current_def (USE_FROM_PTR (op_p));
  57
  58   /* Something defined outside of the loop.  */
  59   if (!new_name)
  60     return;
  61
  62   /* An ordinary ssa name defined in the loop.  */
  63
  64   SET_USE (op_p, new_name);
  65 }
  66
  67
  68 /* Renames the variables in basic block BB.  */
  69
  70 static void
  71 rename_variables_in_bb (basic_block bb)
  72 {
  73   gimple_stmt_iterator gsi;
  74   gimple stmt;
  75   use_operand_p use_p;
  76   ssa_op_iter iter;
  77   edge e;
  78   edge_iterator ei;
  79   struct loop *loop = bb->loop_father;
  80
  81   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  82     {
  83       stmt = gsi_stmt (gsi);
  84       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  85         rename_use_op (use_p);
  86     }
  87
  88   FOR_EACH_EDGE (e, ei, bb->preds)
  89     {
  90       if (!flow_bb_inside_loop_p (loop, e->src))
  91         continue;
  92       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  93         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  94     }
  95 }
  96
  97
  98 typedef struct
  99 {
 100   tree from, to;
 101   basic_block bb;
 102 } adjust_info;
 103
 104 /* A stack of values to be adjusted in debug stmts.  We have to
 105    process them LIFO, so that the closest substitution applies.  If we
 106    processed them FIFO, without the stack, we might substitute uses
 107    with a PHI DEF that would soon become non-dominant, and when we got
 108    to the suitable one, it wouldn't have anything to substitute any
 109    more.  */
 110 static vec<adjust_info, va_stack> adjust_vec;
 111
 112 /* Adjust any debug stmts that referenced AI->from values to use the
 113    loop-closed AI->to, if the references are dominated by AI->bb and
 114    not by the definition of AI->from.  */
 115
 116 static void
 117 adjust_debug_stmts_now (adjust_info *ai)
 118 {
 119   basic_block bbphi = ai->bb;
 120   tree orig_def = ai->from;
 121   tree new_def = ai->to;
 122   imm_use_iterator imm_iter;
 123   gimple stmt;
 124   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 125
 126   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 127
 128   /* Adjust any debug stmts that held onto non-loop-closed
 129      references.  */
 130   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 131     {
 132       use_operand_p use_p;
 133       basic_block bbuse;
 134
 135       if (!is_gimple_debug (stmt))
 136         continue;
 137
 138       gcc_assert (gimple_debug_bind_p (stmt));
 139
 140       bbuse = gimple_bb (stmt);
 141
 142       if ((bbuse == bbphi
 143            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 144           && !(bbuse == bbdef
 145                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 146         {
 147           if (new_def)
 148             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 149               SET_USE (use_p, new_def);
 150           else
 151             {
 152               gimple_debug_bind_reset_value (stmt);
 153               update_stmt (stmt);
 154             }
 155         }
 156     }
 157 }
 158
 159 /* Adjust debug stmts as scheduled before.  */
 160
 161 static void
 162 adjust_vec_debug_stmts (void)
 163 {
 164   if (!MAY_HAVE_DEBUG_STMTS)
 165     return;
 166
 167   gcc_assert (adjust_vec.exists ());
 168
 169   while (!adjust_vec.is_empty ())
 170     {
 171       adjust_debug_stmts_now (&adjust_vec.last ());
 172       adjust_vec.pop ();
 173     }
 174
 175   adjust_vec.release ();
 176 }
 177
 178 /* Adjust any debug stmts that referenced FROM values to use the
 179    loop-closed TO, if the references are dominated by BB and not by
 180    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 181    will be postponed until adjust_vec_debug_stmts is called.  */
 182
 183 static void
 184 adjust_debug_stmts (tree from, tree to, basic_block bb)
 185 {
 186   adjust_info ai;
 187
 188   if (MAY_HAVE_DEBUG_STMTS
 189       && TREE_CODE (from) == SSA_NAME
 190       && ! virtual_operand_p (from))
 191     {
 192       ai.from = from;
 193       ai.to = to;
 194       ai.bb = bb;
 195
 196       if (adjust_vec.exists ())
 197         adjust_vec.safe_push (ai);
 198       else
 199         adjust_debug_stmts_now (&ai);
 200     }
 201 }
 202
 203 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 204    to adjust any debug stmts that referenced the old phi arg,
 205    presumably non-loop-closed references left over from other
 206    transformations.  */
 207
 208 static void
 209 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 210 {
 211   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 212
 213   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 214
 215   if (MAY_HAVE_DEBUG_STMTS)
 216     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 217                         gimple_bb (update_phi));
 218 }
 219
 220
 221 /* Update PHI nodes for a guard of the LOOP.
 222
 223    Input:
 224    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 225         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 226         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 227         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 228         We denote this bb NEW_MERGE_BB because before the guard code was added
 229         it had a single predecessor (the LOOP header), and now it became a merge
 230         point of two paths - the path that ends with the LOOP exit-edge, and
 231         the path that ends with GUARD_EDGE.
 232    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 233         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 234
 235    ===> The CFG before the guard-code was added:
 236         LOOP_header_bb:
 237           loop_body
 238           if (exit_loop) goto update_bb
 239           else           goto LOOP_header_bb
 240         update_bb:
 241
 242    ==> The CFG after the guard-code was added:
 243         guard_bb:
 244           if (LOOP_guard_condition) goto new_merge_bb
 245           else                      goto LOOP_header_bb
 246         LOOP_header_bb:
 247           loop_body
 248           if (exit_loop_condition) goto new_merge_bb
 249           else                     goto LOOP_header_bb
 250         new_merge_bb:
 251           goto update_bb
 252         update_bb:
 253
 254    ==> The CFG after this function:
 255         guard_bb:
 256           if (LOOP_guard_condition) goto new_merge_bb
 257           else                      goto LOOP_header_bb
 258         LOOP_header_bb:
 259           loop_body
 260           if (exit_loop_condition) goto new_exit_bb
 261           else                     goto LOOP_header_bb
 262         new_exit_bb:
 263         new_merge_bb:
 264           goto update_bb
 265         update_bb:
 266
 267    This function:
 268    1. creates and updates the relevant phi nodes to account for the new
 269       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 270       1.1. Create phi nodes at NEW_MERGE_BB.
 271       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 272            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 273    2. preserves loop-closed-ssa-form by creating the required phi nodes
 274       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 275
 276    There are two flavors to this function:
 277
 278    slpeel_update_phi_nodes_for_guard1:
 279      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 280      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 281      for variables that have phis in the loop header.
 282
 283    slpeel_update_phi_nodes_for_guard2:
 284      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 285      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 286      for variables that have phis in the loop exit.
 287
 288    I.E., the overall structure is:
 289
 290         loop1_preheader_bb:
 291                 guard1 (goto loop1/merge1_bb)
 292         loop1
 293         loop1_exit_bb:
 294                 guard2 (goto merge1_bb/merge2_bb)
 295         merge1_bb
 296         loop2
 297         loop2_exit_bb
 298         merge2_bb
 299         next_bb
 300
 301    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 302    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 303    that have phis in loop1->header).
 304
 305    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 306    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 307    that have phis in next_bb). It also adds some of these phis to
 308    loop1_exit_bb.
 309
 310    slpeel_update_phi_nodes_for_guard1 is always called before
 311    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 312    to create correct data-flow and loop-closed-ssa-form.
 313
 314    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 315    that change between iterations of a loop (and therefore have a phi-node
 316    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 317    phis for variables that are used out of the loop (and therefore have
 318    loop-closed exit phis). Some variables may be both updated between
 319    iterations and used after the loop. This is why in loop1_exit_bb we
 320    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 321    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 322
 323    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 324      an original loop. i.e., we have:
 325
 326            orig_loop
 327            guard_bb (goto LOOP/new_merge)
 328            new_loop <-- LOOP
 329            new_exit
 330            new_merge
 331            next_bb
 332
 333      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 334      have:
 335
 336            new_loop
 337            guard_bb (goto LOOP/new_merge)
 338            orig_loop <-- LOOP
 339            new_exit
 340            new_merge
 341            next_bb
 342
 343      The SSA names defined in the original loop have a current
 344      reaching definition that that records the corresponding new
 345      ssa-name used in the new duplicated loop copy.
 346   */
 347
 348 /* Function slpeel_update_phi_nodes_for_guard1
 349
 350    Input:
 351    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 352    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 353             information.
 354
 355    In the context of the overall structure, we have:
 356
 357         loop1_preheader_bb:
 358                 guard1 (goto loop1/merge1_bb)
 359 LOOP->  loop1
 360         loop1_exit_bb:
 361                 guard2 (goto merge1_bb/merge2_bb)
 362         merge1_bb
 363         loop2
 364         loop2_exit_bb
 365         merge2_bb
 366         next_bb
 367
 368    For each name updated between loop iterations (i.e - for each name that has
 369    an entry (loop-header) phi in LOOP) we create a new phi in:
 370    1. merge1_bb (to account for the edge from guard1)
 371    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 372 */
 373
 374 static void
 375 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 376                                     bool is_new_loop, basic_block *new_exit_bb)
 377 {
 378   gimple orig_phi, new_phi;
 379   gimple update_phi, update_phi2;
 380   tree guard_arg, loop_arg;
 381   basic_block new_merge_bb = guard_edge->dest;
 382   edge e = EDGE_SUCC (new_merge_bb, 0);
 383   basic_block update_bb = e->dest;
 384   basic_block orig_bb = loop->header;
 385   edge new_exit_e;
 386   tree current_new_name;
 387   gimple_stmt_iterator gsi_orig, gsi_update;
 388
 389   /* Create new bb between loop and new_merge_bb.  */
 390   *new_exit_bb = split_edge (single_exit (loop));
 391
 392   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 393
 394   for (gsi_orig = gsi_start_phis (orig_bb),
 395        gsi_update = gsi_start_phis (update_bb);
 396        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 397        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 398     {
 399       source_location loop_locus, guard_locus;
 400       tree new_res;
 401       orig_phi = gsi_stmt (gsi_orig);
 402       update_phi = gsi_stmt (gsi_update);
 403
 404       /** 1. Handle new-merge-point phis  **/
 405
 406       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 407       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 408       new_phi = create_phi_node (new_res, new_merge_bb);
 409
 410       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 411             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 412       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 413       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 414                                                       EDGE_SUCC (loop->latch,
 415                                                                  0));
 416       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 417       guard_locus
 418         = gimple_phi_arg_location_from_edge (orig_phi,
 419                                              loop_preheader_edge (loop));
 420
 421       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 422       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 423
 424       /* 1.3. Update phi in successor block.  */
 425       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 426                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 427       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 428       update_phi2 = new_phi;
 429
 430
 431       /** 2. Handle loop-closed-ssa-form phis  **/
 432
 433       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 434         continue;
 435
 436       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 437       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 438       new_phi = create_phi_node (new_res, *new_exit_bb);
 439
 440       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 441       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 442
 443       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 444       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 445       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 446                                   PHI_RESULT (new_phi));
 447
 448       /* 2.4. Record the newly created name with set_current_def.
 449          We want to find a name such that
 450                 name = get_current_def (orig_loop_name)
 451          and to set its current definition as follows:
 452                 set_current_def (name, new_phi_name)
 453
 454          If LOOP is a new loop then loop_arg is already the name we're
 455          looking for. If LOOP is the original loop, then loop_arg is
 456          the orig_loop_name and the relevant name is recorded in its
 457          current reaching definition.  */
 458       if (is_new_loop)
 459         current_new_name = loop_arg;
 460       else
 461         {
 462           current_new_name = get_current_def (loop_arg);
 463           /* current_def is not available only if the variable does not
 464              change inside the loop, in which case we also don't care
 465              about recording a current_def for it because we won't be
 466              trying to create loop-exit-phis for it.  */
 467           if (!current_new_name)
 468             continue;
 469         }
 470       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 471
 472       set_current_def (current_new_name, PHI_RESULT (new_phi));
 473     }
 474 }
 475
 476
 477 /* Function slpeel_update_phi_nodes_for_guard2
 478
 479    Input:
 480    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 481
 482    In the context of the overall structure, we have:
 483
 484         loop1_preheader_bb:
 485                 guard1 (goto loop1/merge1_bb)
 486         loop1
 487         loop1_exit_bb:
 488                 guard2 (goto merge1_bb/merge2_bb)
 489         merge1_bb
 490 LOOP->  loop2
 491         loop2_exit_bb
 492         merge2_bb
 493         next_bb
 494
 495    For each name used out side the loop (i.e - for each name that has an exit
 496    phi in next_bb) we create a new phi in:
 497    1. merge2_bb (to account for the edge from guard_bb)
 498    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 499    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 500       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 501 */
 502
 503 static void
 504 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 505                                     bool is_new_loop, basic_block *new_exit_bb)
 506 {
 507   gimple orig_phi, new_phi;
 508   gimple update_phi, update_phi2;
 509   tree guard_arg, loop_arg;
 510   basic_block new_merge_bb = guard_edge->dest;
 511   edge e = EDGE_SUCC (new_merge_bb, 0);
 512   basic_block update_bb = e->dest;
 513   edge new_exit_e;
 514   tree orig_def, orig_def_new_name;
 515   tree new_name, new_name2;
 516   tree arg;
 517   gimple_stmt_iterator gsi;
 518
 519   /* Create new bb between loop and new_merge_bb.  */
 520   *new_exit_bb = split_edge (single_exit (loop));
 521
 522   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 523
 524   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 525     {
 526       tree new_res;
 527       update_phi = gsi_stmt (gsi);
 528       orig_phi = update_phi;
 529       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 530       /* This loop-closed-phi actually doesn't represent a use
 531          out of the loop - the phi arg is a constant.  */
 532       if (TREE_CODE (orig_def) != SSA_NAME)
 533         continue;
 534       orig_def_new_name = get_current_def (orig_def);
 535       arg = NULL_TREE;
 536
 537       /** 1. Handle new-merge-point phis  **/
 538
 539       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 540       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 541       new_phi = create_phi_node (new_res, new_merge_bb);
 542
 543       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 544             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 545       new_name = orig_def;
 546       new_name2 = NULL_TREE;
 547       if (orig_def_new_name)
 548         {
 549           new_name = orig_def_new_name;
 550           /* Some variables have both loop-entry-phis and loop-exit-phis.
 551              Such variables were given yet newer names by phis placed in
 552              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 553              new_name2 = get_current_def (get_current_def (orig_name)).  */
 554           new_name2 = get_current_def (new_name);
 555         }
 556
 557       if (is_new_loop)
 558         {
 559           guard_arg = orig_def;
 560           loop_arg = new_name;
 561         }
 562       else
 563         {
 564           guard_arg = new_name;
 565           loop_arg = orig_def;
 566         }
 567       if (new_name2)
 568         guard_arg = new_name2;
 569
 570       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 571       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 572
 573       /* 1.3. Update phi in successor block.  */
 574       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 575       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 576       update_phi2 = new_phi;
 577
 578
 579       /** 2. Handle loop-closed-ssa-form phis  **/
 580
 581       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 582       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 583       new_phi = create_phi_node (new_res, *new_exit_bb);
 584
 585       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 586       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 587
 588       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 589       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 590       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 591                                   PHI_RESULT (new_phi));
 592
 593
 594       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 595
 596       /* 3.1. Find the relevant names that need an exit-phi in
 597          GUARD_BB, i.e. names for which
 598          slpeel_update_phi_nodes_for_guard1 had not already created a
 599          phi node. This is the case for names that are used outside
 600          the loop (and therefore need an exit phi) but are not updated
 601          across loop iterations (and therefore don't have a
 602          loop-header-phi).
 603
 604          slpeel_update_phi_nodes_for_guard1 is responsible for
 605          creating loop-exit phis in GUARD_BB for names that have a
 606          loop-header-phi.  When such a phi is created we also record
 607          the new name in its current definition.  If this new name
 608          exists, then guard_arg was set to this new name (see 1.2
 609          above).  Therefore, if guard_arg is not this new name, this
 610          is an indication that an exit-phi in GUARD_BB was not yet
 611          created, so we take care of it here.  */
 612       if (guard_arg == new_name2)
 613         continue;
 614       arg = guard_arg;
 615
 616       /* 3.2. Generate new phi node in GUARD_BB:  */
 617       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 618       new_phi = create_phi_node (new_res, guard_edge->src);
 619
 620       /* 3.3. GUARD_BB has one incoming edge:  */
 621       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 622       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 623                    UNKNOWN_LOCATION);
 624
 625       /* 3.4. Update phi in successor of GUARD_BB:  */
 626       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 627                                                                 == guard_arg);
 628       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 629                                   PHI_RESULT (new_phi));
 630     }
 631 }
 632
 633
 634 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 635    that starts at zero, increases by one and its limit is NITERS.
 636
 637    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 638
 639 void
 640 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 641 {
 642   tree indx_before_incr, indx_after_incr;
 643   gimple cond_stmt;
 644   gimple orig_cond;
 645   edge exit_edge = single_exit (loop);
 646   gimple_stmt_iterator loop_cond_gsi;
 647   gimple_stmt_iterator incr_gsi;
 648   bool insert_after;
 649   tree init = build_int_cst (TREE_TYPE (niters), 0);
 650   tree step = build_int_cst (TREE_TYPE (niters), 1);
 651   LOC loop_loc;
 652   enum tree_code code;
 653
 654   orig_cond = get_loop_exit_condition (loop);
 655   gcc_assert (orig_cond);
 656   loop_cond_gsi = gsi_for_stmt (orig_cond);
 657
 658   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 659   create_iv (init, step, NULL_TREE, loop,
 660              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 661
 662   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 663                                               true, NULL_TREE, true,
 664                                               GSI_SAME_STMT);
 665   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 666                                      true, GSI_SAME_STMT);
 667
 668   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 669   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 670                                  NULL_TREE);
 671
 672   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 673
 674   /* Remove old loop exit test:  */
 675   gsi_remove (&loop_cond_gsi, true);
 676   free_stmt_vec_info (orig_cond);
 677
 678   loop_loc = find_loop_location (loop);
 679   if (dump_enabled_p ())
 680     {
 681       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
 682         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
 683                      LOC_LINE (loop_loc));
 684       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 685     }
 686   loop->nb_iterations = niters;
 687 }
 688
 689
 690 /* Given LOOP this function generates a new copy of it and puts it
 691    on E which is either the entry or exit of LOOP.  */
 692
 693 struct loop *
 694 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 695 {
 696   struct loop *new_loop;
 697   basic_block *new_bbs, *bbs;
 698   bool at_exit;
 699   bool was_imm_dom;
 700   basic_block exit_dest;
 701   edge exit, new_exit;
 702
 703   exit = single_exit (loop);
 704   at_exit = (e == exit);
 705   if (!at_exit && e != loop_preheader_edge (loop))
 706     return NULL;
 707
 708   bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 709   get_loop_body_with_size (loop, bbs, loop->num_nodes);
 710
 711   /* Check whether duplication is possible.  */
 712   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 713     {
 714       free (bbs);
 715       return NULL;
 716     }
 717
 718   /* Generate new loop structure.  */
 719   new_loop = duplicate_loop (loop, loop_outer (loop));
 720   duplicate_subloops (loop, new_loop);
 721
 722   exit_dest = exit->dest;
 723   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 724                                           exit_dest) == loop->header ?
 725                  true : false);
 726
 727   /* Also copy the pre-header, this avoids jumping through hoops to
 728      duplicate the loop entry PHI arguments.  Create an empty
 729      pre-header unconditionally for this.  */
 730   basic_block preheader = split_edge (loop_preheader_edge (loop));
 731   edge entry_e = single_pred_edge (preheader);
 732   bbs[loop->num_nodes] = preheader;
 733   new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
 734
 735   copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
 736             &exit, 1, &new_exit, NULL,
 737             e->src);
 738   basic_block new_preheader = new_bbs[loop->num_nodes];
 739
 740   add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
 741
 742   if (at_exit) /* Add the loop copy at exit.  */
 743     {
 744       redirect_edge_and_branch_force (e, new_preheader);
 745       flush_pending_stmts (e);
 746       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 747       if (was_imm_dom)
 748         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 749
 750       /* And remove the non-necessary forwarder again.  Keep the other
 751          one so we have a proper pre-header for the loop at the exit edge.  */
 752       redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
 753       delete_basic_block (preheader);
 754       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 755                                loop_preheader_edge (loop)->src);
 756     }
 757   else /* Add the copy at entry.  */
 758     {
 759       redirect_edge_and_branch_force (entry_e, new_preheader);
 760       flush_pending_stmts (entry_e);
 761       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 762
 763       redirect_edge_and_branch_force (new_exit, preheader);
 764       flush_pending_stmts (new_exit);
 765       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 766
 767       /* And remove the non-necessary forwarder again.  Keep the other
 768          one so we have a proper pre-header for the loop at the exit edge.  */
 769       redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
 770       delete_basic_block (new_preheader);
 771       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 772                                loop_preheader_edge (new_loop)->src);
 773     }
 774
 775   for (unsigned i = 0; i < loop->num_nodes+1; i++)
 776     rename_variables_in_bb (new_bbs[i]);
 777
 778   free (new_bbs);
 779   free (bbs);
 780
 781 #ifdef ENABLE_CHECKING
 782   verify_dominators (CDI_DOMINATORS);
 783 #endif
 784
 785   return new_loop;
 786 }
 787
 788
 789 /* Given the condition statement COND, put it as the last statement
 790    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 791    Assumes that this is the single exit of the guarded loop.
 792    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 793
 794 static edge
 795 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 796                        gimple_seq cond_expr_stmt_list,
 797                        basic_block exit_bb, basic_block dom_bb,
 798                        int probability)
 799 {
 800   gimple_stmt_iterator gsi;
 801   edge new_e, enter_e;
 802   gimple cond_stmt;
 803   gimple_seq gimplify_stmt_list = NULL;
 804
 805   enter_e = EDGE_SUCC (guard_bb, 0);
 806   enter_e->flags &= ~EDGE_FALLTHRU;
 807   enter_e->flags |= EDGE_FALSE_VALUE;
 808   gsi = gsi_last_bb (guard_bb);
 809
 810   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 811                                  NULL_TREE);
 812   if (gimplify_stmt_list)
 813     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 814   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 815   if (cond_expr_stmt_list)
 816     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 817
 818   gsi = gsi_last_bb (guard_bb);
 819   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 820
 821   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 822   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 823
 824   new_e->count = guard_bb->count;
 825   new_e->probability = probability;
 826   new_e->count = apply_probability (enter_e->count, probability);
 827   enter_e->count -= new_e->count;
 828   enter_e->probability = inverse_probability (probability);
 829   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 830   return new_e;
 831 }
 832
 833
 834 /* This function verifies that the following restrictions apply to LOOP:
 835    (1) it is innermost
 836    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 837    (3) it is single entry, single exit
 838    (4) its exit condition is the last stmt in the header
 839    (5) E is the entry/exit edge of LOOP.
 840  */
 841
 842 bool
 843 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 844 {
 845   edge exit_e = single_exit (loop);
 846   edge entry_e = loop_preheader_edge (loop);
 847   gimple orig_cond = get_loop_exit_condition (loop);
 848   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 849
 850   if (need_ssa_update_p (cfun))
 851     return false;
 852
 853   if (loop->inner
 854       /* All loops have an outer scope; the only case loop->outer is NULL is for
 855          the function itself.  */
 856       || !loop_outer (loop)
 857       || loop->num_nodes != 2
 858       || !empty_block_p (loop->latch)
 859       || !single_exit (loop)
 860       /* Verify that new loop exit condition can be trivially modified.  */
 861       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 862       || (e != exit_e && e != entry_e))
 863     return false;
 864
 865   return true;
 866 }
 867
 868 #ifdef ENABLE_CHECKING
 869 static void
 870 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 871                                  struct loop *second_loop)
 872 {
 873   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
 874   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 875   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 876
 877   /* A guard that controls whether the second_loop is to be executed or skipped
 878      is placed in first_loop->exit.  first_loop->exit therefore has two
 879      successors - one is the preheader of second_loop, and the other is a bb
 880      after second_loop.
 881    */
 882   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 883
 884   /* 1. Verify that one of the successors of first_loop->exit is the preheader
 885         of second_loop.  */
 886
 887   /* The preheader of new_loop is expected to have two predecessors:
 888      first_loop->exit and the block that precedes first_loop.  */
 889
 890   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 891               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 892                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 893                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 894                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 895
 896   /* Verify that the other successor of first_loop->exit is after the
 897      second_loop.  */
 898   /* TODO */
 899 }
 900 #endif
 901
 902 /* If the run time cost model check determines that vectorization is
 903    not profitable and hence scalar loop should be generated then set
 904    FIRST_NITERS to prologue peeled iterations. This will allow all the
 905    iterations to be executed in the prologue peeled scalar loop.  */
 906
 907 static void
 908 set_prologue_iterations (basic_block bb_before_first_loop,
 909                          tree *first_niters,
 910                          struct loop *loop,
 911                          unsigned int th,
 912                          int probability)
 913 {
 914   edge e;
 915   basic_block cond_bb, then_bb;
 916   tree var, prologue_after_cost_adjust_name;
 917   gimple_stmt_iterator gsi;
 918   gimple newphi;
 919   edge e_true, e_false, e_fallthru;
 920   gimple cond_stmt;
 921   gimple_seq stmts = NULL;
 922   tree cost_pre_condition = NULL_TREE;
 923   tree scalar_loop_iters =
 924     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
 925
 926   e = single_pred_edge (bb_before_first_loop);
 927   cond_bb = split_edge(e);
 928
 929   e = single_pred_edge (bb_before_first_loop);
 930   then_bb = split_edge(e);
 931   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
 932
 933   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
 934                                    EDGE_FALSE_VALUE);
 935   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
 936
 937   e_true = EDGE_PRED (then_bb, 0);
 938   e_true->flags &= ~EDGE_FALLTHRU;
 939   e_true->flags |= EDGE_TRUE_VALUE;
 940
 941   e_true->probability = probability;
 942   e_false->probability = inverse_probability (probability);
 943   e_true->count = apply_probability (cond_bb->count, probability);
 944   e_false->count = cond_bb->count - e_true->count;
 945   then_bb->frequency = EDGE_FREQUENCY (e_true);
 946   then_bb->count = e_true->count;
 947
 948   e_fallthru = EDGE_SUCC (then_bb, 0);
 949   e_fallthru->count = then_bb->count;
 950
 951   gsi = gsi_last_bb (cond_bb);
 952   cost_pre_condition =
 953     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
 954                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
 955   cost_pre_condition =
 956     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
 957                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
 958   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
 959                                            NULL_TREE, NULL_TREE);
 960   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 961
 962   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
 963                         "prologue_after_cost_adjust");
 964   prologue_after_cost_adjust_name =
 965     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
 966
 967   gsi = gsi_last_bb (then_bb);
 968   if (stmts)
 969     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
 970
 971   newphi = create_phi_node (var, bb_before_first_loop);
 972   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
 973                UNKNOWN_LOCATION);
 974   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
 975
 976   *first_niters = PHI_RESULT (newphi);
 977 }
 978
 979 /* Function slpeel_tree_peel_loop_to_edge.
 980
 981    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 982    that is placed on the entry (exit) edge E of LOOP. After this transformation
 983    we have two loops one after the other - first-loop iterates FIRST_NITERS
 984    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 985    If the cost model indicates that it is profitable to emit a scalar
 986    loop instead of the vector one, then the prolog (epilog) loop will iterate
 987    for the entire unchanged scalar iterations of the loop.
 988
 989    Input:
 990    - LOOP: the loop to be peeled.
 991    - E: the exit or entry edge of LOOP.
 992         If it is the entry edge, we peel the first iterations of LOOP. In this
 993         case first-loop is LOOP, and second-loop is the newly created loop.
 994         If it is the exit edge, we peel the last iterations of LOOP. In this
 995         case, first-loop is the newly created loop, and second-loop is LOOP.
 996    - NITERS: the number of iterations that LOOP iterates.
 997    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 998    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 999         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1000         is false, the caller of this function may want to take care of this
1001         (this can be useful if we don't want new stmts added to first-loop).
1002    - TH: cost model profitability threshold of iterations for vectorization.
1003    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1004                           during versioning and hence needs to occur during
1005                           prologue generation or whether cost model check
1006                           has not occurred during prologue generation and hence
1007                           needs to occur during epilogue generation.
1008    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1009    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1010
1011
1012    Output:
1013    The function returns a pointer to the new loop-copy, or NULL if it failed
1014    to perform the transformation.
1015
1016    The function generates two if-then-else guards: one before the first loop,
1017    and the other before the second loop:
1018    The first guard is:
1019      if (FIRST_NITERS == 0) then skip the first loop,
1020      and go directly to the second loop.
1021    The second guard is:
1022      if (FIRST_NITERS == NITERS) then skip the second loop.
1023
1024    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1025    then the generated condition is combined with COND_EXPR and the
1026    statements in COND_EXPR_STMT_LIST are emitted together with it.
1027
1028    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1029    FORNOW the resulting code will not be in loop-closed-ssa form.
1030 */
1031
1032 static struct loop*
1033 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1034                                edge e, tree *first_niters,
1035                                tree niters, bool update_first_loop_count,
1036                                unsigned int th, bool check_profitability,
1037                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1038                                int bound1, int bound2)
1039 {
1040   struct loop *new_loop = NULL, *first_loop, *second_loop;
1041   edge skip_e;
1042   tree pre_condition = NULL_TREE;
1043   basic_block bb_before_second_loop, bb_after_second_loop;
1044   basic_block bb_before_first_loop;
1045   basic_block bb_between_loops;
1046   basic_block new_exit_bb;
1047   gimple_stmt_iterator gsi;
1048   edge exit_e = single_exit (loop);
1049   LOC loop_loc;
1050   tree cost_pre_condition = NULL_TREE;
1051   /* There are many aspects to how likely the first loop is going to be executed.
1052      Without histogram we can't really do good job.  Simply set it to
1053      2/3, so the first loop is not reordered to the end of function and
1054      the hot path through stays short.  */
1055   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1056   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1057   int probability_of_second_loop;
1058
1059   if (!slpeel_can_duplicate_loop_p (loop, e))
1060     return NULL;
1061
1062   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1063      in the exit bb and rename all the uses after the loop.  This simplifies
1064      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1065      (but normally loop closed SSA form doesn't require virtual PHIs to be
1066      in the same form).  Doing this early simplifies the checking what
1067      uses should be renamed.  */
1068   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1069     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1070       {
1071         gimple phi = gsi_stmt (gsi);
1072         for (gsi = gsi_start_phis (exit_e->dest);
1073              !gsi_end_p (gsi); gsi_next (&gsi))
1074           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1075             break;
1076         if (gsi_end_p (gsi))
1077           {
1078             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1079             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1080             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1081             imm_use_iterator imm_iter;
1082             gimple stmt;
1083             use_operand_p use_p;
1084
1085             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1086             gimple_phi_set_result (new_phi, new_vop);
1087             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1088               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1089                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1090                   SET_USE (use_p, new_vop);
1091           }
1092         break;
1093       }
1094
1095   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1096         Resulting CFG would be:
1097
1098         first_loop:
1099         do {
1100         } while ...
1101
1102         second_loop:
1103         do {
1104         } while ...
1105
1106         orig_exit_bb:
1107    */
1108
1109   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1110     {
1111       loop_loc = find_loop_location (loop);
1112       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1113                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1114       return NULL;
1115     }
1116
1117   if (MAY_HAVE_DEBUG_STMTS)
1118     {
1119       gcc_assert (!adjust_vec.exists ());
1120       vec_stack_alloc (adjust_info, adjust_vec, 32);
1121     }
1122
1123   if (e == exit_e)
1124     {
1125       /* NEW_LOOP was placed after LOOP.  */
1126       first_loop = loop;
1127       second_loop = new_loop;
1128     }
1129   else
1130     {
1131       /* NEW_LOOP was placed before LOOP.  */
1132       first_loop = new_loop;
1133       second_loop = loop;
1134     }
1135
1136   /* 2.  Add the guard code in one of the following ways:
1137
1138      2.a Add the guard that controls whether the first loop is executed.
1139          This occurs when this function is invoked for prologue or epilogue
1140          generation and when the cost model check can be done at compile time.
1141
1142          Resulting CFG would be:
1143
1144          bb_before_first_loop:
1145          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1146                                 GOTO first-loop
1147
1148          first_loop:
1149          do {
1150          } while ...
1151
1152          bb_before_second_loop:
1153
1154          second_loop:
1155          do {
1156          } while ...
1157
1158          orig_exit_bb:
1159
1160      2.b Add the cost model check that allows the prologue
1161          to iterate for the entire unchanged scalar
1162          iterations of the loop in the event that the cost
1163          model indicates that the scalar loop is more
1164          profitable than the vector one. This occurs when
1165          this function is invoked for prologue generation
1166          and the cost model check needs to be done at run
1167          time.
1168
1169          Resulting CFG after prologue peeling would be:
1170
1171          if (scalar_loop_iterations <= th)
1172            FIRST_NITERS = scalar_loop_iterations
1173
1174          bb_before_first_loop:
1175          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1176                                 GOTO first-loop
1177
1178          first_loop:
1179          do {
1180          } while ...
1181
1182          bb_before_second_loop:
1183
1184          second_loop:
1185          do {
1186          } while ...
1187
1188          orig_exit_bb:
1189
1190      2.c Add the cost model check that allows the epilogue
1191          to iterate for the entire unchanged scalar
1192          iterations of the loop in the event that the cost
1193          model indicates that the scalar loop is more
1194          profitable than the vector one. This occurs when
1195          this function is invoked for epilogue generation
1196          and the cost model check needs to be done at run
1197          time.  This check is combined with any pre-existing
1198          check in COND_EXPR to avoid versioning.
1199
1200          Resulting CFG after prologue peeling would be:
1201
1202          bb_before_first_loop:
1203          if ((scalar_loop_iterations <= th)
1204              ||
1205              FIRST_NITERS == 0) GOTO bb_before_second_loop
1206                                 GOTO first-loop
1207
1208          first_loop:
1209          do {
1210          } while ...
1211
1212          bb_before_second_loop:
1213
1214          second_loop:
1215          do {
1216          } while ...
1217
1218          orig_exit_bb:
1219   */
1220
1221   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1222   /* Loop copying insterted a forwarder block for us here.  */
1223   bb_before_second_loop = single_exit (first_loop)->dest;
1224
1225   probability_of_second_loop = (inverse_probability (first_guard_probability)
1226                                 + combine_probabilities (second_guard_probability,
1227                                                          first_guard_probability));
1228   /* Theoretically preheader edge of first loop and exit edge should have
1229      same frequencies.  Loop exit probablities are however easy to get wrong.
1230      It is safer to copy value from original loop entry.  */
1231   bb_before_second_loop->frequency
1232      = apply_probability (bb_before_first_loop->frequency,
1233                           probability_of_second_loop);
1234   bb_before_second_loop->count
1235      = apply_probability (bb_before_first_loop->count,
1236                           probability_of_second_loop);
1237   single_succ_edge (bb_before_second_loop)->count
1238      = bb_before_second_loop->count;
1239
1240   /* Epilogue peeling.  */
1241   if (!update_first_loop_count)
1242     {
1243       pre_condition =
1244         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1245                      build_int_cst (TREE_TYPE (*first_niters), 0));
1246       if (check_profitability)
1247         {
1248           tree scalar_loop_iters
1249             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1250                                         (loop_vec_info_for_loop (loop)));
1251           cost_pre_condition =
1252             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1253                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1254
1255           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1256                                        cost_pre_condition, pre_condition);
1257         }
1258       if (cond_expr)
1259         {
1260           pre_condition =
1261             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1262                          pre_condition,
1263                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1264                                       cond_expr));
1265         }
1266     }
1267
1268   /* Prologue peeling.  */
1269   else
1270     {
1271       if (check_profitability)
1272         set_prologue_iterations (bb_before_first_loop, first_niters,
1273                                  loop, th, first_guard_probability);
1274
1275       pre_condition =
1276         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1277                      build_int_cst (TREE_TYPE (*first_niters), 0));
1278     }
1279
1280   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1281                                   cond_expr_stmt_list,
1282                                   bb_before_second_loop, bb_before_first_loop,
1283                                   inverse_probability (first_guard_probability));
1284   scale_loop_profile (first_loop, first_guard_probability,
1285                       check_profitability && (int)th > bound1 ? th : bound1);
1286   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1287                                       first_loop == new_loop,
1288                                       &new_exit_bb);
1289
1290
1291   /* 3. Add the guard that controls whether the second loop is executed.
1292         Resulting CFG would be:
1293
1294         bb_before_first_loop:
1295         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1296                                GOTO first-loop
1297
1298         first_loop:
1299         do {
1300         } while ...
1301
1302         bb_between_loops:
1303         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1304                                     GOTO bb_before_second_loop
1305
1306         bb_before_second_loop:
1307
1308         second_loop:
1309         do {
1310         } while ...
1311
1312         bb_after_second_loop:
1313
1314         orig_exit_bb:
1315    */
1316
1317   bb_between_loops = new_exit_bb;
1318   bb_after_second_loop = split_edge (single_exit (second_loop));
1319
1320   pre_condition =
1321         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1322   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1323                                   bb_after_second_loop, bb_before_first_loop,
1324                                   inverse_probability (second_guard_probability));
1325   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1326   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1327                                      second_loop == new_loop, &new_exit_bb);
1328
1329   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1330    */
1331   if (update_first_loop_count)
1332     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1333
1334   delete_update_ssa ();
1335
1336   adjust_vec_debug_stmts ();
1337
1338   return new_loop;
1339 }
1340
1341 /* Function vect_get_loop_location.
1342
1343    Extract the location of the loop in the source code.
1344    If the loop is not well formed for vectorization, an estimated
1345    location is calculated.
1346    Return the loop location if succeed and NULL if not.  */
1347
1348 LOC
1349 find_loop_location (struct loop *loop)
1350 {
1351   gimple stmt = NULL;
1352   basic_block bb;
1353   gimple_stmt_iterator si;
1354
1355   if (!loop)
1356     return UNKNOWN_LOC;
1357
1358   stmt = get_loop_exit_condition (loop);
1359
1360   if (stmt
1361       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1362     return gimple_location (stmt);
1363
1364   /* If we got here the loop is probably not "well formed",
1365      try to estimate the loop location */
1366
1367   if (!loop->header)
1368     return UNKNOWN_LOC;
1369
1370   bb = loop->header;
1371
1372   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1373     {
1374       stmt = gsi_stmt (si);
1375       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
1376         return gimple_location (stmt);
1377     }
1378
1379   return UNKNOWN_LOC;
1380 }
1381
1382
1383 /* This function builds ni_name = number of iterations loop executes
1384    on the loop preheader.  If SEQ is given the stmt is instead emitted
1385    there.  */
1386
1387 static tree
1388 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1389 {
1390   tree ni_name, var;
1391   gimple_seq stmts = NULL;
1392   edge pe;
1393   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1394   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1395
1396   var = create_tmp_var (TREE_TYPE (ni), "niters");
1397   ni_name = force_gimple_operand (ni, &stmts, false, var);
1398
1399   pe = loop_preheader_edge (loop);
1400   if (stmts)
1401     {
1402       if (seq)
1403         gimple_seq_add_seq (&seq, stmts);
1404       else
1405         {
1406           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1407           gcc_assert (!new_bb);
1408         }
1409     }
1410
1411   return ni_name;
1412 }
1413
1414
1415 /* This function generates the following statements:
1416
1417  ni_name = number of iterations loop executes
1418  ratio = ni_name / vf
1419  ratio_mult_vf_name = ratio * vf
1420
1421  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1422  if that is non-NULL.  */
1423
1424 static void
1425 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1426                                  tree *ni_name_ptr,
1427                                  tree *ratio_mult_vf_name_ptr,
1428                                  tree *ratio_name_ptr,
1429                                  gimple_seq cond_expr_stmt_list)
1430 {
1431
1432   edge pe;
1433   basic_block new_bb;
1434   gimple_seq stmts;
1435   tree ni_name, ni_minus_gap_name;
1436   tree var;
1437   tree ratio_name;
1438   tree ratio_mult_vf_name;
1439   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1440   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1441   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1442   tree log_vf;
1443
1444   pe = loop_preheader_edge (loop);
1445
1446   /* Generate temporary variable that contains
1447      number of iterations loop executes.  */
1448
1449   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1450   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1451
1452   /* If epilogue loop is required because of data accesses with gaps, we
1453      subtract one iteration from the total number of iterations here for
1454      correct calculation of RATIO.  */
1455   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1456     {
1457       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1458                                        ni_name,
1459                                        build_one_cst (TREE_TYPE (ni_name)));
1460       if (!is_gimple_val (ni_minus_gap_name))
1461         {
1462           var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1463
1464           stmts = NULL;
1465           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1466                                                     true, var);
1467           if (cond_expr_stmt_list)
1468             gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1469           else
1470             {
1471               pe = loop_preheader_edge (loop);
1472               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1473               gcc_assert (!new_bb);
1474             }
1475         }
1476     }
1477   else
1478     ni_minus_gap_name = ni_name;
1479
1480   /* Create: ratio = ni >> log2(vf) */
1481
1482   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1483                             ni_minus_gap_name, log_vf);
1484   if (!is_gimple_val (ratio_name))
1485     {
1486       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1487
1488       stmts = NULL;
1489       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1490       if (cond_expr_stmt_list)
1491         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1492       else
1493         {
1494           pe = loop_preheader_edge (loop);
1495           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1496           gcc_assert (!new_bb);
1497         }
1498     }
1499
1500   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1501
1502   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1503                                     ratio_name, log_vf);
1504   if (!is_gimple_val (ratio_mult_vf_name))
1505     {
1506       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1507
1508       stmts = NULL;
1509       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1510                                                  true, var);
1511       if (cond_expr_stmt_list)
1512         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1513       else
1514         {
1515           pe = loop_preheader_edge (loop);
1516           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1517           gcc_assert (!new_bb);
1518         }
1519     }
1520
1521   *ni_name_ptr = ni_name;
1522   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1523   *ratio_name_ptr = ratio_name;
1524
1525   return;
1526 }
1527
1528 /* Function vect_can_advance_ivs_p
1529
1530    In case the number of iterations that LOOP iterates is unknown at compile
1531    time, an epilog loop will be generated, and the loop induction variables
1532    (IVs) will be "advanced" to the value they are supposed to take just before
1533    the epilog loop.  Here we check that the access function of the loop IVs
1534    and the expression that represents the loop bound are simple enough.
1535    These restrictions will be relaxed in the future.  */
1536
1537 bool
1538 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1539 {
1540   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1541   basic_block bb = loop->header;
1542   gimple phi;
1543   gimple_stmt_iterator gsi;
1544
1545   /* Analyze phi functions of the loop header.  */
1546
1547   if (dump_enabled_p ())
1548     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
1549   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1550     {
1551       tree access_fn = NULL;
1552       tree evolution_part;
1553
1554       phi = gsi_stmt (gsi);
1555       if (dump_enabled_p ())
1556         {
1557           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1558           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1559         }
1560
1561       /* Skip virtual phi's. The data dependences that are associated with
1562          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1563
1564       if (virtual_operand_p (PHI_RESULT (phi)))
1565         {
1566           if (dump_enabled_p ())
1567             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1568                              "virtual phi. skip.");
1569           continue;
1570         }
1571
1572       /* Skip reduction phis.  */
1573
1574       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1575         {
1576           if (dump_enabled_p ())
1577             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578                              "reduc phi. skip.");
1579           continue;
1580         }
1581
1582       /* Analyze the evolution function.  */
1583
1584       access_fn = instantiate_parameters
1585         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1586
1587       if (!access_fn)
1588         {
1589           if (dump_enabled_p ())
1590             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1591                              "No Access function.");
1592           return false;
1593         }
1594
1595       STRIP_NOPS (access_fn);
1596       if (dump_enabled_p ())
1597         {
1598           dump_printf_loc (MSG_NOTE, vect_location,
1599                            "Access function of PHI: ");
1600           dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
1601         }
1602
1603       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1604
1605       if (evolution_part == NULL_TREE)
1606         {
1607           if (dump_enabled_p ())
1608             dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution.");
1609           return false;
1610         }
1611
1612       /* FORNOW: We do not transform initial conditions of IVs
1613          which evolution functions are a polynomial of degree >= 2.  */
1614
1615       if (tree_is_chrec (evolution_part))
1616         return false;
1617     }
1618
1619   return true;
1620 }
1621
1622
1623 /*   Function vect_update_ivs_after_vectorizer.
1624
1625      "Advance" the induction variables of LOOP to the value they should take
1626      after the execution of LOOP.  This is currently necessary because the
1627      vectorizer does not handle induction variables that are used after the
1628      loop.  Such a situation occurs when the last iterations of LOOP are
1629      peeled, because:
1630      1. We introduced new uses after LOOP for IVs that were not originally used
1631         after LOOP: the IVs of LOOP are now used by an epilog loop.
1632      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1633         times, whereas the loop IVs should be bumped N times.
1634
1635      Input:
1636      - LOOP - a loop that is going to be vectorized. The last few iterations
1637               of LOOP were peeled.
1638      - NITERS - the number of iterations that LOOP executes (before it is
1639                 vectorized). i.e, the number of times the ivs should be bumped.
1640      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1641                   coming out from LOOP on which there are uses of the LOOP ivs
1642                   (this is the path from LOOP->exit to epilog_loop->preheader).
1643
1644                   The new definitions of the ivs are placed in LOOP->exit.
1645                   The phi args associated with the edge UPDATE_E in the bb
1646                   UPDATE_E->dest are updated accordingly.
1647
1648      Assumption 1: Like the rest of the vectorizer, this function assumes
1649      a single loop exit that has a single predecessor.
1650
1651      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1652      organized in the same order.
1653
1654      Assumption 3: The access function of the ivs is simple enough (see
1655      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1656
1657      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1658      coming out of LOOP on which the ivs of LOOP are used (this is the path
1659      that leads to the epilog loop; other paths skip the epilog loop).  This
1660      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1661      needs to have its phis updated.
1662  */
1663
1664 static void
1665 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1666                                   edge update_e)
1667 {
1668   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1669   basic_block exit_bb = single_exit (loop)->dest;
1670   gimple phi, phi1;
1671   gimple_stmt_iterator gsi, gsi1;
1672   basic_block update_bb = update_e->dest;
1673
1674   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1675
1676   /* Make sure there exists a single-predecessor exit bb:  */
1677   gcc_assert (single_pred_p (exit_bb));
1678
1679   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1680        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1681        gsi_next (&gsi), gsi_next (&gsi1))
1682     {
1683       tree init_expr;
1684       tree step_expr, off;
1685       tree type;
1686       tree var, ni, ni_name;
1687       gimple_stmt_iterator last_gsi;
1688       stmt_vec_info stmt_info;
1689
1690       phi = gsi_stmt (gsi);
1691       phi1 = gsi_stmt (gsi1);
1692       if (dump_enabled_p ())
1693         {
1694           dump_printf_loc (MSG_NOTE, vect_location,
1695                            "vect_update_ivs_after_vectorizer: phi: ");
1696           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1697         }
1698
1699       /* Skip virtual phi's.  */
1700       if (virtual_operand_p (PHI_RESULT (phi)))
1701         {
1702           if (dump_enabled_p ())
1703             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                              "virtual phi. skip.");
1705           continue;
1706         }
1707
1708       /* Skip reduction phis.  */
1709       stmt_info = vinfo_for_stmt (phi);
1710       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1711         {
1712           if (dump_enabled_p ())
1713             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1714                              "reduc phi. skip.");
1715           continue;
1716         }
1717
1718       type = TREE_TYPE (gimple_phi_result (phi));
1719       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1720       step_expr = unshare_expr (step_expr);
1721
1722       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1723          of degree >= 2 or exponential.  */
1724       gcc_assert (!tree_is_chrec (step_expr));
1725
1726       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1727
1728       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1729                          fold_convert (TREE_TYPE (step_expr), niters),
1730                          step_expr);
1731       if (POINTER_TYPE_P (type))
1732         ni = fold_build_pointer_plus (init_expr, off);
1733       else
1734         ni = fold_build2 (PLUS_EXPR, type,
1735                           init_expr, fold_convert (type, off));
1736
1737       var = create_tmp_var (type, "tmp");
1738
1739       last_gsi = gsi_last_bb (exit_bb);
1740       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1741                                           true, GSI_SAME_STMT);
1742
1743       /* Fix phi expressions in the successor bb.  */
1744       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1745     }
1746 }
1747
1748 /* Function vect_do_peeling_for_loop_bound
1749
1750    Peel the last iterations of the loop represented by LOOP_VINFO.
1751    The peeled iterations form a new epilog loop.  Given that the loop now
1752    iterates NITERS times, the new epilog loop iterates
1753    NITERS % VECTORIZATION_FACTOR times.
1754
1755    The original loop will later be made to iterate
1756    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1757
1758    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1759    test.  */
1760
1761 void
1762 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1763                                 unsigned int th, bool check_profitability)
1764 {
1765   tree ni_name, ratio_mult_vf_name;
1766   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1767   struct loop *new_loop;
1768   edge update_e;
1769   basic_block preheader;
1770   int loop_num;
1771   int max_iter;
1772   tree cond_expr = NULL_TREE;
1773   gimple_seq cond_expr_stmt_list = NULL;
1774
1775   if (dump_enabled_p ())
1776     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1777                      "=== vect_do_peeling_for_loop_bound ===");
1778
1779   initialize_original_copy_tables ();
1780
1781   /* Generate the following variables on the preheader of original loop:
1782
1783      ni_name = number of iteration the original loop executes
1784      ratio = ni_name / vf
1785      ratio_mult_vf_name = ratio * vf  */
1786   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1787                                    &ratio_mult_vf_name, ratio,
1788                                    cond_expr_stmt_list);
1789
1790   loop_num  = loop->num;
1791
1792   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1793                                             &ratio_mult_vf_name, ni_name, false,
1794                                             th, check_profitability,
1795                                             cond_expr, cond_expr_stmt_list,
1796                                             0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1797   gcc_assert (new_loop);
1798   gcc_assert (loop_num == loop->num);
1799 #ifdef ENABLE_CHECKING
1800   slpeel_verify_cfg_after_peeling (loop, new_loop);
1801 #endif
1802
1803   /* A guard that controls whether the new_loop is to be executed or skipped
1804      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1805      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1806      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1807      is on the path where the LOOP IVs are used and need to be updated.  */
1808
1809   preheader = loop_preheader_edge (new_loop)->src;
1810   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1811     update_e = EDGE_PRED (preheader, 0);
1812   else
1813     update_e = EDGE_PRED (preheader, 1);
1814
1815   /* Update IVs of original loop as if they were advanced
1816      by ratio_mult_vf_name steps.  */
1817   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1818
1819   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1820      and this means N-2 loopback edge executions.
1821
1822      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1823      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1824   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1825               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1826               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1827   if (check_profitability)
1828     max_iter = MAX (max_iter, (int) th - 1);
1829   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1830   dump_printf (MSG_OPTIMIZED_LOCATIONS,
1831                "Setting upper bound of nb iterations for epilogue "
1832                "loop to %d\n", max_iter);
1833
1834   /* After peeling we have to reset scalar evolution analyzer.  */
1835   scev_reset ();
1836
1837   free_original_copy_tables ();
1838 }
1839
1840
1841 /* Function vect_gen_niters_for_prolog_loop
1842
1843    Set the number of iterations for the loop represented by LOOP_VINFO
1844    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1845    and the misalignment of DR - the data reference recorded in
1846    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1847    this loop, the data reference DR will refer to an aligned location.
1848
1849    The following computation is generated:
1850
1851    If the misalignment of DR is known at compile time:
1852      addr_mis = int mis = DR_MISALIGNMENT (dr);
1853    Else, compute address misalignment in bytes:
1854      addr_mis = addr & (vectype_align - 1)
1855
1856    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1857
1858    (elem_size = element type size; an element is the scalar element whose type
1859    is the inner type of the vectype)
1860
1861    When the step of the data-ref in the loop is not 1 (as in interleaved data
1862    and SLP), the number of iterations of the prolog must be divided by the step
1863    (which is equal to the size of interleaved group).
1864
1865    The above formulas assume that VF == number of elements in the vector. This
1866    may not hold when there are multiple-types in the loop.
1867    In this case, for some data-references in the loop the VF does not represent
1868    the number of elements that fit in the vector.  Therefore, instead of VF we
1869    use TYPE_VECTOR_SUBPARTS.  */
1870
1871 static tree
1872 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
1873 {
1874   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1875   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1876   tree var;
1877   gimple_seq stmts;
1878   tree iters, iters_name;
1879   edge pe;
1880   basic_block new_bb;
1881   gimple dr_stmt = DR_STMT (dr);
1882   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1883   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1884   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1885   tree niters_type = TREE_TYPE (loop_niters);
1886   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1887
1888   pe = loop_preheader_edge (loop);
1889
1890   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1891     {
1892       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1893
1894       if (dump_enabled_p ())
1895         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1896                          "known peeling = %d.", npeel);
1897
1898       iters = build_int_cst (niters_type, npeel);
1899       *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1900     }
1901   else
1902     {
1903       gimple_seq new_stmts = NULL;
1904       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1905       tree offset = negative
1906           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
1907       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
1908                                                 &new_stmts, offset, loop);
1909       tree type = unsigned_type_for (TREE_TYPE (start_addr));
1910       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1911       HOST_WIDE_INT elem_size =
1912                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1913       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1914       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1915       tree nelements_tree = build_int_cst (type, nelements);
1916       tree byte_misalign;
1917       tree elem_misalign;
1918
1919       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1920       gcc_assert (!new_bb);
1921
1922       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
1923       byte_misalign =
1924         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
1925                      vectype_align_minus_1);
1926
1927       /* Create:  elem_misalign = byte_misalign / element_size  */
1928       elem_misalign =
1929         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1930
1931       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1932       if (negative)
1933         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1934       else
1935         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1936       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1937       iters = fold_convert (niters_type, iters);
1938       *bound = nelements;
1939     }
1940
1941   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
1942   /* If the loop bound is known at compile time we already verified that it is
1943      greater than vf; since the misalignment ('iters') is at most vf, there's
1944      no need to generate the MIN_EXPR in this case.  */
1945   if (TREE_CODE (loop_niters) != INTEGER_CST)
1946     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1947
1948   if (dump_enabled_p ())
1949     {
1950       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1951                        "niters for prolog loop: ");
1952       dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, iters);
1953     }
1954
1955   var = create_tmp_var (niters_type, "prolog_loop_niters");
1956   stmts = NULL;
1957   iters_name = force_gimple_operand (iters, &stmts, false, var);
1958
1959   /* Insert stmt on loop preheader edge.  */
1960   if (stmts)
1961     {
1962       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1963       gcc_assert (!new_bb);
1964     }
1965
1966   return iters_name;
1967 }
1968
1969
1970 /* Function vect_update_init_of_dr
1971
1972    NITERS iterations were peeled from LOOP.  DR represents a data reference
1973    in LOOP.  This function updates the information recorded in DR to
1974    account for the fact that the first NITERS iterations had already been
1975    executed.  Specifically, it updates the OFFSET field of DR.  */
1976
1977 static void
1978 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1979 {
1980   tree offset = DR_OFFSET (dr);
1981
1982   niters = fold_build2 (MULT_EXPR, sizetype,
1983                         fold_convert (sizetype, niters),
1984                         fold_convert (sizetype, DR_STEP (dr)));
1985   offset = fold_build2 (PLUS_EXPR, sizetype,
1986                         fold_convert (sizetype, offset), niters);
1987   DR_OFFSET (dr) = offset;
1988 }
1989
1990
1991 /* Function vect_update_inits_of_drs
1992
1993    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1994    This function updates the information recorded for the data references in
1995    the loop to account for the fact that the first NITERS iterations had
1996    already been executed.  Specifically, it updates the initial_condition of
1997    the access_function of all the data_references in the loop.  */
1998
1999 static void
2000 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2001 {
2002   unsigned int i;
2003   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2004   struct data_reference *dr;
2005
2006  if (dump_enabled_p ())
2007     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2008                      "=== vect_update_inits_of_dr ===");
2009
2010   FOR_EACH_VEC_ELT (datarefs, i, dr)
2011     vect_update_init_of_dr (dr, niters);
2012 }
2013
2014
2015 /* Function vect_do_peeling_for_alignment
2016
2017    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2018    'niters' is set to the misalignment of one of the data references in the
2019    loop, thereby forcing it to refer to an aligned location at the beginning
2020    of the execution of this loop.  The data reference for which we are
2021    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2022
2023 void
2024 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2025                                unsigned int th, bool check_profitability)
2026 {
2027   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2028   tree niters_of_prolog_loop, ni_name;
2029   tree n_iters;
2030   tree wide_prolog_niters;
2031   struct loop *new_loop;
2032   int max_iter;
2033   int bound = 0;
2034
2035   if (dump_enabled_p ())
2036     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2037                      "=== vect_do_peeling_for_alignment ===");
2038
2039   initialize_original_copy_tables ();
2040
2041   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2042   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2043                                                            ni_name,
2044                                                            &bound);
2045
2046   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2047   new_loop =
2048     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2049                                    &niters_of_prolog_loop, ni_name, true,
2050                                    th, check_profitability, NULL_TREE, NULL,
2051                                    bound,
2052                                    0);
2053
2054   gcc_assert (new_loop);
2055 #ifdef ENABLE_CHECKING
2056   slpeel_verify_cfg_after_peeling (new_loop, loop);
2057 #endif
2058   /* For vectorization factor N, we need to copy at most N-1 values
2059      for alignment and this means N-2 loopback edge executions.  */
2060   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2061   if (check_profitability)
2062     max_iter = MAX (max_iter, (int) th - 1);
2063   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2064   dump_printf (MSG_OPTIMIZED_LOCATIONS,
2065                "Setting upper bound of nb iterations for prologue "
2066                "loop to %d\n", max_iter);
2067
2068   /* Update number of times loop executes.  */
2069   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2070   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2071                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2072
2073   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2074     wide_prolog_niters = niters_of_prolog_loop;
2075   else
2076     {
2077       gimple_seq seq = NULL;
2078       edge pe = loop_preheader_edge (loop);
2079       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2080       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2081       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2082                                                  var);
2083       if (seq)
2084         {
2085           /* Insert stmt on loop preheader edge.  */
2086           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2087           gcc_assert (!new_bb);
2088         }
2089     }
2090
2091   /* Update the init conditions of the access functions of all data refs.  */
2092   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2093
2094   /* After peeling we have to reset scalar evolution analyzer.  */
2095   scev_reset ();
2096
2097   free_original_copy_tables ();
2098 }
2099
2100
2101 /* Function vect_create_cond_for_align_checks.
2102
2103    Create a conditional expression that represents the alignment checks for
2104    all of data references (array element references) whose alignment must be
2105    checked at runtime.
2106
2107    Input:
2108    COND_EXPR  - input conditional expression.  New conditions will be chained
2109                 with logical AND operation.
2110    LOOP_VINFO - two fields of the loop information are used.
2111                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2112                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2113
2114    Output:
2115    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2116                          expression.
2117    The returned value is the conditional expression to be used in the if
2118    statement that controls which version of the loop gets executed at runtime.
2119
2120    The algorithm makes two assumptions:
2121      1) The number of bytes "n" in a vector is a power of 2.
2122      2) An address "a" is aligned if a%n is zero and that this
2123         test can be done as a&(n-1) == 0.  For example, for 16
2124         byte vectors the test is a&0xf == 0.  */
2125
2126 static void
2127 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2128                                    tree *cond_expr,
2129                                    gimple_seq *cond_expr_stmt_list)
2130 {
2131   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2132   vec<gimple> may_misalign_stmts
2133     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2134   gimple ref_stmt;
2135   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2136   tree mask_cst;
2137   unsigned int i;
2138   tree int_ptrsize_type;
2139   char tmp_name[20];
2140   tree or_tmp_name = NULL_TREE;
2141   tree and_tmp_name;
2142   gimple and_stmt;
2143   tree ptrsize_zero;
2144   tree part_cond_expr;
2145
2146   /* Check that mask is one less than a power of 2, i.e., mask is
2147      all zeros followed by all ones.  */
2148   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2149
2150   int_ptrsize_type = signed_type_for (ptr_type_node);
2151
2152   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2153      of the first vector of the i'th data reference. */
2154
2155   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2156     {
2157       gimple_seq new_stmt_list = NULL;
2158       tree addr_base;
2159       tree addr_tmp_name;
2160       tree new_or_tmp_name;
2161       gimple addr_stmt, or_stmt;
2162       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2163       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2164       bool negative = tree_int_cst_compare
2165         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2166       tree offset = negative
2167         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2168
2169       /* create: addr_tmp = (int)(address_of_first_vector) */
2170       addr_base =
2171         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2172                                               offset, loop);
2173       if (new_stmt_list != NULL)
2174         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2175
2176       sprintf (tmp_name, "addr2int%d", i);
2177       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2178       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2179                                                 addr_base, NULL_TREE);
2180       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2181
2182       /* The addresses are OR together.  */
2183
2184       if (or_tmp_name != NULL_TREE)
2185         {
2186           /* create: or_tmp = or_tmp | addr_tmp */
2187           sprintf (tmp_name, "orptrs%d", i);
2188           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2189           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2190                                                   new_or_tmp_name,
2191                                                   or_tmp_name, addr_tmp_name);
2192           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2193           or_tmp_name = new_or_tmp_name;
2194         }
2195       else
2196         or_tmp_name = addr_tmp_name;
2197
2198     } /* end for i */
2199
2200   mask_cst = build_int_cst (int_ptrsize_type, mask);
2201
2202   /* create: and_tmp = or_tmp & mask  */
2203   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2204
2205   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2206                                            or_tmp_name, mask_cst);
2207   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2208
2209   /* Make and_tmp the left operand of the conditional test against zero.
2210      if and_tmp has a nonzero bit then some address is unaligned.  */
2211   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2212   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2213                                 and_tmp_name, ptrsize_zero);
2214   if (*cond_expr)
2215     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2216                               *cond_expr, part_cond_expr);
2217   else
2218     *cond_expr = part_cond_expr;
2219 }
2220
2221
2222 /* Function vect_vfa_segment_size.
2223
2224    Create an expression that computes the size of segment
2225    that will be accessed for a data reference.  The functions takes into
2226    account that realignment loads may access one more vector.
2227
2228    Input:
2229      DR: The data reference.
2230      LENGTH_FACTOR: segment length to consider.
2231
2232    Return an expression whose value is the size of segment which will be
2233    accessed by DR.  */
2234
2235 static tree
2236 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2237 {
2238   tree segment_length;
2239
2240   if (integer_zerop (DR_STEP (dr)))
2241     segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2242   else
2243     segment_length = size_binop (MULT_EXPR,
2244                                  fold_convert (sizetype, DR_STEP (dr)),
2245                                  fold_convert (sizetype, length_factor));
2246
2247   if (vect_supportable_dr_alignment (dr, false)
2248         == dr_explicit_realign_optimized)
2249     {
2250       tree vector_size = TYPE_SIZE_UNIT
2251                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2252
2253       segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2254     }
2255   return segment_length;
2256 }
2257
2258
2259 /* Function vect_create_cond_for_alias_checks.
2260
2261    Create a conditional expression that represents the run-time checks for
2262    overlapping of address ranges represented by a list of data references
2263    relations passed as input.
2264
2265    Input:
2266    COND_EXPR  - input conditional expression.  New conditions will be chained
2267                 with logical AND operation.
2268    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2269                 to be checked.
2270
2271    Output:
2272    COND_EXPR - conditional expression.
2273    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2274                          expression.
2275
2276
2277    The returned value is the conditional expression to be used in the if
2278    statement that controls which version of the loop gets executed at runtime.
2279 */
2280
2281 static void
2282 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2283                                    tree * cond_expr,
2284                                    gimple_seq * cond_expr_stmt_list)
2285 {
2286   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2287   vec<ddr_p>  may_alias_ddrs =
2288     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2289   int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2290   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2291
2292   ddr_p ddr;
2293   unsigned int i;
2294   tree part_cond_expr, length_factor;
2295
2296   /* Create expression
2297      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2298      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2299      &&
2300      ...
2301      &&
2302      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2303      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2304
2305   if (may_alias_ddrs.is_empty ())
2306     return;
2307
2308   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2309     {
2310       struct data_reference *dr_a, *dr_b;
2311       gimple dr_group_first_a, dr_group_first_b;
2312       tree addr_base_a, addr_base_b;
2313       tree segment_length_a, segment_length_b;
2314       gimple stmt_a, stmt_b;
2315       tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2316
2317       dr_a = DDR_A (ddr);
2318       stmt_a = DR_STMT (DDR_A (ddr));
2319       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2320       if (dr_group_first_a)
2321         {
2322           stmt_a = dr_group_first_a;
2323           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2324         }
2325
2326       dr_b = DDR_B (ddr);
2327       stmt_b = DR_STMT (DDR_B (ddr));
2328       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2329       if (dr_group_first_b)
2330         {
2331           stmt_b = dr_group_first_b;
2332           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2333         }
2334
2335       addr_base_a =
2336         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2337                                               NULL_TREE, loop);
2338       addr_base_b =
2339         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2340                                               NULL_TREE, loop);
2341
2342       if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2343         length_factor = scalar_loop_iters;
2344       else
2345         length_factor = size_int (vect_factor);
2346       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2347       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2348
2349       if (dump_enabled_p ())
2350         {
2351           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2352                            "create runtime check for data references ");
2353           dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_a));
2354           dump_printf (MSG_OPTIMIZED_LOCATIONS, " and ");
2355           dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_b));
2356         }
2357
2358       seg_a_min = addr_base_a;
2359       seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2360       if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2361         seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2362
2363       seg_b_min = addr_base_b;
2364       seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2365       if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2366         seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2367
2368       part_cond_expr =
2369         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2370           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2371           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2372
2373       if (*cond_expr)
2374         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2375                                   *cond_expr, part_cond_expr);
2376       else
2377         *cond_expr = part_cond_expr;
2378     }
2379
2380   if (dump_enabled_p ())
2381     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2382                      "created %u versioning for alias checks.\n",
2383                      may_alias_ddrs.length ());
2384 }
2385
2386
2387 /* Function vect_loop_versioning.
2388
2389    If the loop has data references that may or may not be aligned or/and
2390    has data reference relations whose independence was not proven then
2391    two versions of the loop need to be generated, one which is vectorized
2392    and one which isn't.  A test is then generated to control which of the
2393    loops is executed.  The test checks for the alignment of all of the
2394    data references that may or may not be aligned.  An additional
2395    sequence of runtime tests is generated for each pairs of DDRs whose
2396    independence was not proven.  The vectorized version of loop is
2397    executed only if both alias and alignment tests are passed.
2398
2399    The test generated to check which version of loop is executed
2400    is modified to also check for profitability as indicated by the
2401    cost model initially.
2402
2403    The versioning precondition(s) are placed in *COND_EXPR and
2404    *COND_EXPR_STMT_LIST.  */
2405
2406 void
2407 vect_loop_versioning (loop_vec_info loop_vinfo,
2408                       unsigned int th, bool check_profitability)
2409 {
2410   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2411   basic_block condition_bb;
2412   gimple_stmt_iterator gsi, cond_exp_gsi;
2413   basic_block merge_bb;
2414   basic_block new_exit_bb;
2415   edge new_exit_e, e;
2416   gimple orig_phi, new_phi;
2417   tree cond_expr = NULL_TREE;
2418   gimple_seq cond_expr_stmt_list = NULL;
2419   tree arg;
2420   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2421   gimple_seq gimplify_stmt_list = NULL;
2422   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2423
2424   if (check_profitability)
2425     {
2426       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2427                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2428       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2429                                           is_gimple_condexpr, NULL_TREE);
2430     }
2431
2432   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2433     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2434                                        &cond_expr_stmt_list);
2435
2436   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2437     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
2438                                        &cond_expr_stmt_list);
2439
2440   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2441                                       is_gimple_condexpr, NULL_TREE);
2442   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2443
2444   initialize_original_copy_tables ();
2445   loop_version (loop, cond_expr, &condition_bb,
2446                 prob, prob, REG_BR_PROB_BASE - prob, true);
2447   free_original_copy_tables();
2448
2449   /* Loop versioning violates an assumption we try to maintain during
2450      vectorization - that the loop exit block has a single predecessor.
2451      After versioning, the exit block of both loop versions is the same
2452      basic block (i.e. it has two predecessors). Just in order to simplify
2453      following transformations in the vectorizer, we fix this situation
2454      here by adding a new (empty) block on the exit-edge of the loop,
2455      with the proper loop-exit phis to maintain loop-closed-form.  */
2456
2457   merge_bb = single_exit (loop)->dest;
2458   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2459   new_exit_bb = split_edge (single_exit (loop));
2460   new_exit_e = single_exit (loop);
2461   e = EDGE_SUCC (new_exit_bb, 0);
2462
2463   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2464     {
2465       tree new_res;
2466       orig_phi = gsi_stmt (gsi);
2467       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2468       new_phi = create_phi_node (new_res, new_exit_bb);
2469       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2470       add_phi_arg (new_phi, arg, new_exit_e,
2471                    gimple_phi_arg_location_from_edge (orig_phi, e));
2472       adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2473     }
2474
2475   /* End loop-exit-fixes after versioning.  */
2476
2477   update_ssa (TODO_update_ssa);
2478   if (cond_expr_stmt_list)
2479     {
2480       cond_exp_gsi = gsi_last_bb (condition_bb);
2481       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2482                              GSI_SAME_STMT);
2483     }
2484 }