gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-flow.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "diagnostic-core.h"
  35 #include "tree-scalar-evolution.h"
  36 #include "tree-vectorizer.h"
  37 #include "langhooks.h"
  38
  39 /*************************************************************************
  40   Simple Loop Peeling Utilities
  41
  42   Utilities to support loop peeling for vectorization purposes.
  43  *************************************************************************/
  44
  45
  46 /* Renames the use *OP_P.  */
  47
  48 static void
  49 rename_use_op (use_operand_p op_p)
  50 {
  51   tree new_name;
  52
  53   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  54     return;
  55
  56   new_name = get_current_def (USE_FROM_PTR (op_p));
  57
  58   /* Something defined outside of the loop.  */
  59   if (!new_name)
  60     return;
  61
  62   /* An ordinary ssa name defined in the loop.  */
  63
  64   SET_USE (op_p, new_name);
  65 }
  66
  67
  68 /* Renames the variables in basic block BB.  */
  69
  70 void
  71 rename_variables_in_bb (basic_block bb)
  72 {
  73   gimple_stmt_iterator gsi;
  74   gimple stmt;
  75   use_operand_p use_p;
  76   ssa_op_iter iter;
  77   edge e;
  78   edge_iterator ei;
  79   struct loop *loop = bb->loop_father;
  80
  81   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  82     {
  83       stmt = gsi_stmt (gsi);
  84       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
  85         rename_use_op (use_p);
  86     }
  87
  88   FOR_EACH_EDGE (e, ei, bb->succs)
  89     {
  90       if (!flow_bb_inside_loop_p (loop, e->dest))
  91         continue;
  92       for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
  93         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
  94     }
  95 }
  96
  97
  98 /* Renames variables in new generated LOOP.  */
  99
 100 void
 101 rename_variables_in_loop (struct loop *loop)
 102 {
 103   unsigned i;
 104   basic_block *bbs;
 105
 106   bbs = get_loop_body (loop);
 107
 108   for (i = 0; i < loop->num_nodes; i++)
 109     rename_variables_in_bb (bbs[i]);
 110
 111   free (bbs);
 112 }
 113
 114 typedef struct
 115 {
 116   tree from, to;
 117   basic_block bb;
 118 } adjust_info;
 119
 120 /* A stack of values to be adjusted in debug stmts.  We have to
 121    process them LIFO, so that the closest substitution applies.  If we
 122    processed them FIFO, without the stack, we might substitute uses
 123    with a PHI DEF that would soon become non-dominant, and when we got
 124    to the suitable one, it wouldn't have anything to substitute any
 125    more.  */
 126 static vec<adjust_info, va_stack> adjust_vec;
 127
 128 /* Adjust any debug stmts that referenced AI->from values to use the
 129    loop-closed AI->to, if the references are dominated by AI->bb and
 130    not by the definition of AI->from.  */
 131
 132 static void
 133 adjust_debug_stmts_now (adjust_info *ai)
 134 {
 135   basic_block bbphi = ai->bb;
 136   tree orig_def = ai->from;
 137   tree new_def = ai->to;
 138   imm_use_iterator imm_iter;
 139   gimple stmt;
 140   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 141
 142   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 143
 144   /* Adjust any debug stmts that held onto non-loop-closed
 145      references.  */
 146   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 147     {
 148       use_operand_p use_p;
 149       basic_block bbuse;
 150
 151       if (!is_gimple_debug (stmt))
 152         continue;
 153
 154       gcc_assert (gimple_debug_bind_p (stmt));
 155
 156       bbuse = gimple_bb (stmt);
 157
 158       if ((bbuse == bbphi
 159            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 160           && !(bbuse == bbdef
 161                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 162         {
 163           if (new_def)
 164             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 165               SET_USE (use_p, new_def);
 166           else
 167             {
 168               gimple_debug_bind_reset_value (stmt);
 169               update_stmt (stmt);
 170             }
 171         }
 172     }
 173 }
 174
 175 /* Adjust debug stmts as scheduled before.  */
 176
 177 static void
 178 adjust_vec_debug_stmts (void)
 179 {
 180   if (!MAY_HAVE_DEBUG_STMTS)
 181     return;
 182
 183   gcc_assert (adjust_vec.exists ());
 184
 185   while (!adjust_vec.is_empty ())
 186     {
 187       adjust_debug_stmts_now (&adjust_vec.last ());
 188       adjust_vec.pop ();
 189     }
 190
 191   adjust_vec.release ();
 192 }
 193
 194 /* Adjust any debug stmts that referenced FROM values to use the
 195    loop-closed TO, if the references are dominated by BB and not by
 196    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 197    will be postponed until adjust_vec_debug_stmts is called.  */
 198
 199 static void
 200 adjust_debug_stmts (tree from, tree to, basic_block bb)
 201 {
 202   adjust_info ai;
 203
 204   if (MAY_HAVE_DEBUG_STMTS
 205       && TREE_CODE (from) == SSA_NAME
 206       && ! virtual_operand_p (from))
 207     {
 208       ai.from = from;
 209       ai.to = to;
 210       ai.bb = bb;
 211
 212       if (adjust_vec.exists ())
 213         adjust_vec.safe_push (ai);
 214       else
 215         adjust_debug_stmts_now (&ai);
 216     }
 217 }
 218
 219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 220    to adjust any debug stmts that referenced the old phi arg,
 221    presumably non-loop-closed references left over from other
 222    transformations.  */
 223
 224 static void
 225 adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
 226 {
 227   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 228
 229   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 230
 231   if (MAY_HAVE_DEBUG_STMTS)
 232     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 233                         gimple_bb (update_phi));
 234 }
 235
 236
 237 /* Update the PHI nodes of NEW_LOOP.
 238
 239    NEW_LOOP is a duplicate of ORIG_LOOP.
 240    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 241    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 242    executes before it.  */
 243
 244 static void
 245 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 246                                        struct loop *new_loop, bool after)
 247 {
 248   tree new_ssa_name;
 249   gimple phi_new, phi_orig;
 250   tree def;
 251   edge orig_loop_latch = loop_latch_edge (orig_loop);
 252   edge orig_entry_e = loop_preheader_edge (orig_loop);
 253   edge new_loop_exit_e = single_exit (new_loop);
 254   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 255   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 256   gimple_stmt_iterator gsi_new, gsi_orig;
 257
 258   /*
 259      step 1. For each loop-header-phi:
 260              Add the first phi argument for the phi in NEW_LOOP
 261             (the one associated with the entry of NEW_LOOP)
 262
 263      step 2. For each loop-header-phi:
 264              Add the second phi argument for the phi in NEW_LOOP
 265             (the one associated with the latch of NEW_LOOP)
 266
 267      step 3. Update the phis in the successor block of NEW_LOOP.
 268
 269         case 1: NEW_LOOP was placed before ORIG_LOOP:
 270                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 271                 Updating the phis in the successor block can therefore be done
 272                 along with the scanning of the loop header phis, because the
 273                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 274                 phi nodes, organized in the same order.
 275
 276         case 2: NEW_LOOP was placed after ORIG_LOOP:
 277                 The successor block of NEW_LOOP is the original exit block of
 278                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 279                 We postpone updating these phis to a later stage (when
 280                 loop guards are added).
 281    */
 282
 283
 284   /* Scan the phis in the headers of the old and new loops
 285      (they are organized in exactly the same order).  */
 286
 287   for (gsi_new = gsi_start_phis (new_loop->header),
 288        gsi_orig = gsi_start_phis (orig_loop->header);
 289        !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
 290        gsi_next (&gsi_new), gsi_next (&gsi_orig))
 291     {
 292       source_location locus;
 293       phi_new = gsi_stmt (gsi_new);
 294       phi_orig = gsi_stmt (gsi_orig);
 295
 296       /* step 1.  */
 297       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 298       locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
 299       add_phi_arg (phi_new, def, new_loop_entry_e, locus);
 300
 301       /* step 2.  */
 302       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 303       locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
 304       if (TREE_CODE (def) != SSA_NAME)
 305         continue;
 306
 307       new_ssa_name = get_current_def (def);
 308       if (!new_ssa_name)
 309         {
 310           /* This only happens if there are no definitions
 311              inside the loop. use the phi_result in this case.  */
 312           new_ssa_name = PHI_RESULT (phi_new);
 313         }
 314
 315       /* An ordinary ssa name defined in the loop.  */
 316       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
 317
 318       /* Drop any debug references outside the loop, if they would
 319          become ill-formed SSA.  */
 320       adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
 321
 322       /* step 3 (case 1).  */
 323       if (!after)
 324         {
 325           gcc_assert (new_loop_exit_e == orig_entry_e);
 326           adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
 327         }
 328     }
 329 }
 330
 331
 332 /* Update PHI nodes for a guard of the LOOP.
 333
 334    Input:
 335    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 336         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 337         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 338         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 339         We denote this bb NEW_MERGE_BB because before the guard code was added
 340         it had a single predecessor (the LOOP header), and now it became a merge
 341         point of two paths - the path that ends with the LOOP exit-edge, and
 342         the path that ends with GUARD_EDGE.
 343    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 344         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 345
 346    ===> The CFG before the guard-code was added:
 347         LOOP_header_bb:
 348           loop_body
 349           if (exit_loop) goto update_bb
 350           else           goto LOOP_header_bb
 351         update_bb:
 352
 353    ==> The CFG after the guard-code was added:
 354         guard_bb:
 355           if (LOOP_guard_condition) goto new_merge_bb
 356           else                      goto LOOP_header_bb
 357         LOOP_header_bb:
 358           loop_body
 359           if (exit_loop_condition) goto new_merge_bb
 360           else                     goto LOOP_header_bb
 361         new_merge_bb:
 362           goto update_bb
 363         update_bb:
 364
 365    ==> The CFG after this function:
 366         guard_bb:
 367           if (LOOP_guard_condition) goto new_merge_bb
 368           else                      goto LOOP_header_bb
 369         LOOP_header_bb:
 370           loop_body
 371           if (exit_loop_condition) goto new_exit_bb
 372           else                     goto LOOP_header_bb
 373         new_exit_bb:
 374         new_merge_bb:
 375           goto update_bb
 376         update_bb:
 377
 378    This function:
 379    1. creates and updates the relevant phi nodes to account for the new
 380       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 381       1.1. Create phi nodes at NEW_MERGE_BB.
 382       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 383            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 384    2. preserves loop-closed-ssa-form by creating the required phi nodes
 385       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 386
 387    There are two flavors to this function:
 388
 389    slpeel_update_phi_nodes_for_guard1:
 390      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 391      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 392      for variables that have phis in the loop header.
 393
 394    slpeel_update_phi_nodes_for_guard2:
 395      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 396      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 397      for variables that have phis in the loop exit.
 398
 399    I.E., the overall structure is:
 400
 401         loop1_preheader_bb:
 402                 guard1 (goto loop1/merge1_bb)
 403         loop1
 404         loop1_exit_bb:
 405                 guard2 (goto merge1_bb/merge2_bb)
 406         merge1_bb
 407         loop2
 408         loop2_exit_bb
 409         merge2_bb
 410         next_bb
 411
 412    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 413    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 414    that have phis in loop1->header).
 415
 416    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 417    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 418    that have phis in next_bb). It also adds some of these phis to
 419    loop1_exit_bb.
 420
 421    slpeel_update_phi_nodes_for_guard1 is always called before
 422    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 423    to create correct data-flow and loop-closed-ssa-form.
 424
 425    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 426    that change between iterations of a loop (and therefore have a phi-node
 427    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 428    phis for variables that are used out of the loop (and therefore have
 429    loop-closed exit phis). Some variables may be both updated between
 430    iterations and used after the loop. This is why in loop1_exit_bb we
 431    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 432    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 433
 434    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 435      an original loop. i.e., we have:
 436
 437            orig_loop
 438            guard_bb (goto LOOP/new_merge)
 439            new_loop <-- LOOP
 440            new_exit
 441            new_merge
 442            next_bb
 443
 444      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 445      have:
 446
 447            new_loop
 448            guard_bb (goto LOOP/new_merge)
 449            orig_loop <-- LOOP
 450            new_exit
 451            new_merge
 452            next_bb
 453
 454      The SSA names defined in the original loop have a current
 455      reaching definition that that records the corresponding new
 456      ssa-name used in the new duplicated loop copy.
 457   */
 458
 459 /* Function slpeel_update_phi_nodes_for_guard1
 460
 461    Input:
 462    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 463    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 464             information.
 465
 466    In the context of the overall structure, we have:
 467
 468         loop1_preheader_bb:
 469                 guard1 (goto loop1/merge1_bb)
 470 LOOP->  loop1
 471         loop1_exit_bb:
 472                 guard2 (goto merge1_bb/merge2_bb)
 473         merge1_bb
 474         loop2
 475         loop2_exit_bb
 476         merge2_bb
 477         next_bb
 478
 479    For each name updated between loop iterations (i.e - for each name that has
 480    an entry (loop-header) phi in LOOP) we create a new phi in:
 481    1. merge1_bb (to account for the edge from guard1)
 482    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 483 */
 484
 485 static void
 486 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 487                                     bool is_new_loop, basic_block *new_exit_bb)
 488 {
 489   gimple orig_phi, new_phi;
 490   gimple update_phi, update_phi2;
 491   tree guard_arg, loop_arg;
 492   basic_block new_merge_bb = guard_edge->dest;
 493   edge e = EDGE_SUCC (new_merge_bb, 0);
 494   basic_block update_bb = e->dest;
 495   basic_block orig_bb = loop->header;
 496   edge new_exit_e;
 497   tree current_new_name;
 498   gimple_stmt_iterator gsi_orig, gsi_update;
 499
 500   /* Create new bb between loop and new_merge_bb.  */
 501   *new_exit_bb = split_edge (single_exit (loop));
 502
 503   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 504
 505   for (gsi_orig = gsi_start_phis (orig_bb),
 506        gsi_update = gsi_start_phis (update_bb);
 507        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
 508        gsi_next (&gsi_orig), gsi_next (&gsi_update))
 509     {
 510       source_location loop_locus, guard_locus;
 511       tree new_res;
 512       orig_phi = gsi_stmt (gsi_orig);
 513       update_phi = gsi_stmt (gsi_update);
 514
 515       /** 1. Handle new-merge-point phis  **/
 516
 517       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 518       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 519       new_phi = create_phi_node (new_res, new_merge_bb);
 520
 521       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 522             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 523       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 524       loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
 525                                                       EDGE_SUCC (loop->latch,
 526                                                                  0));
 527       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 528       guard_locus
 529         = gimple_phi_arg_location_from_edge (orig_phi,
 530                                              loop_preheader_edge (loop));
 531
 532       add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
 533       add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
 534
 535       /* 1.3. Update phi in successor block.  */
 536       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 537                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 538       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 539       update_phi2 = new_phi;
 540
 541
 542       /** 2. Handle loop-closed-ssa-form phis  **/
 543
 544       if (virtual_operand_p (PHI_RESULT (orig_phi)))
 545         continue;
 546
 547       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 548       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 549       new_phi = create_phi_node (new_res, *new_exit_bb);
 550
 551       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 552       add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
 553
 554       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 555       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 556       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 557                                   PHI_RESULT (new_phi));
 558
 559       /* 2.4. Record the newly created name with set_current_def.
 560          We want to find a name such that
 561                 name = get_current_def (orig_loop_name)
 562          and to set its current definition as follows:
 563                 set_current_def (name, new_phi_name)
 564
 565          If LOOP is a new loop then loop_arg is already the name we're
 566          looking for. If LOOP is the original loop, then loop_arg is
 567          the orig_loop_name and the relevant name is recorded in its
 568          current reaching definition.  */
 569       if (is_new_loop)
 570         current_new_name = loop_arg;
 571       else
 572         {
 573           current_new_name = get_current_def (loop_arg);
 574           /* current_def is not available only if the variable does not
 575              change inside the loop, in which case we also don't care
 576              about recording a current_def for it because we won't be
 577              trying to create loop-exit-phis for it.  */
 578           if (!current_new_name)
 579             continue;
 580         }
 581       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 582
 583       set_current_def (current_new_name, PHI_RESULT (new_phi));
 584     }
 585 }
 586
 587
 588 /* Function slpeel_update_phi_nodes_for_guard2
 589
 590    Input:
 591    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 592
 593    In the context of the overall structure, we have:
 594
 595         loop1_preheader_bb:
 596                 guard1 (goto loop1/merge1_bb)
 597         loop1
 598         loop1_exit_bb:
 599                 guard2 (goto merge1_bb/merge2_bb)
 600         merge1_bb
 601 LOOP->  loop2
 602         loop2_exit_bb
 603         merge2_bb
 604         next_bb
 605
 606    For each name used out side the loop (i.e - for each name that has an exit
 607    phi in next_bb) we create a new phi in:
 608    1. merge2_bb (to account for the edge from guard_bb)
 609    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 610    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 611       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 612 */
 613
 614 static void
 615 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 616                                     bool is_new_loop, basic_block *new_exit_bb)
 617 {
 618   gimple orig_phi, new_phi;
 619   gimple update_phi, update_phi2;
 620   tree guard_arg, loop_arg;
 621   basic_block new_merge_bb = guard_edge->dest;
 622   edge e = EDGE_SUCC (new_merge_bb, 0);
 623   basic_block update_bb = e->dest;
 624   edge new_exit_e;
 625   tree orig_def, orig_def_new_name;
 626   tree new_name, new_name2;
 627   tree arg;
 628   gimple_stmt_iterator gsi;
 629
 630   /* Create new bb between loop and new_merge_bb.  */
 631   *new_exit_bb = split_edge (single_exit (loop));
 632
 633   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 634
 635   for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
 636     {
 637       tree new_res;
 638       update_phi = gsi_stmt (gsi);
 639       orig_phi = update_phi;
 640       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 641       /* This loop-closed-phi actually doesn't represent a use
 642          out of the loop - the phi arg is a constant.  */
 643       if (TREE_CODE (orig_def) != SSA_NAME)
 644         continue;
 645       orig_def_new_name = get_current_def (orig_def);
 646       arg = NULL_TREE;
 647
 648       /** 1. Handle new-merge-point phis  **/
 649
 650       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 651       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 652       new_phi = create_phi_node (new_res, new_merge_bb);
 653
 654       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 655             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 656       new_name = orig_def;
 657       new_name2 = NULL_TREE;
 658       if (orig_def_new_name)
 659         {
 660           new_name = orig_def_new_name;
 661           /* Some variables have both loop-entry-phis and loop-exit-phis.
 662              Such variables were given yet newer names by phis placed in
 663              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 664              new_name2 = get_current_def (get_current_def (orig_name)).  */
 665           new_name2 = get_current_def (new_name);
 666         }
 667
 668       if (is_new_loop)
 669         {
 670           guard_arg = orig_def;
 671           loop_arg = new_name;
 672         }
 673       else
 674         {
 675           guard_arg = new_name;
 676           loop_arg = orig_def;
 677         }
 678       if (new_name2)
 679         guard_arg = new_name2;
 680
 681       add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
 682       add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
 683
 684       /* 1.3. Update phi in successor block.  */
 685       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 686       adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
 687       update_phi2 = new_phi;
 688
 689
 690       /** 2. Handle loop-closed-ssa-form phis  **/
 691
 692       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 693       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 694       new_phi = create_phi_node (new_res, *new_exit_bb);
 695
 696       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 697       add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
 698
 699       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 700       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 701       adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
 702                                   PHI_RESULT (new_phi));
 703
 704
 705       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 706
 707       /* 3.1. Find the relevant names that need an exit-phi in
 708          GUARD_BB, i.e. names for which
 709          slpeel_update_phi_nodes_for_guard1 had not already created a
 710          phi node. This is the case for names that are used outside
 711          the loop (and therefore need an exit phi) but are not updated
 712          across loop iterations (and therefore don't have a
 713          loop-header-phi).
 714
 715          slpeel_update_phi_nodes_for_guard1 is responsible for
 716          creating loop-exit phis in GUARD_BB for names that have a
 717          loop-header-phi.  When such a phi is created we also record
 718          the new name in its current definition.  If this new name
 719          exists, then guard_arg was set to this new name (see 1.2
 720          above).  Therefore, if guard_arg is not this new name, this
 721          is an indication that an exit-phi in GUARD_BB was not yet
 722          created, so we take care of it here.  */
 723       if (guard_arg == new_name2)
 724         continue;
 725       arg = guard_arg;
 726
 727       /* 3.2. Generate new phi node in GUARD_BB:  */
 728       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
 729       new_phi = create_phi_node (new_res, guard_edge->src);
 730
 731       /* 3.3. GUARD_BB has one incoming edge:  */
 732       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 733       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
 734                    UNKNOWN_LOCATION);
 735
 736       /* 3.4. Update phi in successor of GUARD_BB:  */
 737       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 738                                                                 == guard_arg);
 739       adjust_phi_and_debug_stmts (update_phi2, guard_edge,
 740                                   PHI_RESULT (new_phi));
 741     }
 742 }
 743
 744
 745 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 746    that starts at zero, increases by one and its limit is NITERS.
 747
 748    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 749
 750 void
 751 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 752 {
 753   tree indx_before_incr, indx_after_incr;
 754   gimple cond_stmt;
 755   gimple orig_cond;
 756   edge exit_edge = single_exit (loop);
 757   gimple_stmt_iterator loop_cond_gsi;
 758   gimple_stmt_iterator incr_gsi;
 759   bool insert_after;
 760   tree init = build_int_cst (TREE_TYPE (niters), 0);
 761   tree step = build_int_cst (TREE_TYPE (niters), 1);
 762   LOC loop_loc;
 763   enum tree_code code;
 764
 765   orig_cond = get_loop_exit_condition (loop);
 766   gcc_assert (orig_cond);
 767   loop_cond_gsi = gsi_for_stmt (orig_cond);
 768
 769   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 770   create_iv (init, step, NULL_TREE, loop,
 771              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 772
 773   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 774                                               true, NULL_TREE, true,
 775                                               GSI_SAME_STMT);
 776   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 777                                      true, GSI_SAME_STMT);
 778
 779   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 780   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 781                                  NULL_TREE);
 782
 783   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 784
 785   /* Remove old loop exit test:  */
 786   gsi_remove (&loop_cond_gsi, true);
 787   free_stmt_vec_info (orig_cond);
 788
 789   loop_loc = find_loop_location (loop);
 790   if (dump_enabled_p ())
 791     {
 792       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOC)
 793         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOC_FILE (loop_loc),
 794                      LOC_LINE (loop_loc));
 795       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 796     }
 797   loop->nb_iterations = niters;
 798 }
 799
 800
 801 /* Given LOOP this function generates a new copy of it and puts it
 802    on E which is either the entry or exit of LOOP.  */
 803
 804 struct loop *
 805 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 806 {
 807   struct loop *new_loop;
 808   basic_block *new_bbs, *bbs;
 809   bool at_exit;
 810   bool was_imm_dom;
 811   basic_block exit_dest;
 812   gimple phi;
 813   tree phi_arg;
 814   edge exit, new_exit;
 815   gimple_stmt_iterator gsi;
 816
 817   at_exit = (e == single_exit (loop));
 818   if (!at_exit && e != loop_preheader_edge (loop))
 819     return NULL;
 820
 821   bbs = get_loop_body (loop);
 822
 823   /* Check whether duplication is possible.  */
 824   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 825     {
 826       free (bbs);
 827       return NULL;
 828     }
 829
 830   /* Generate new loop structure.  */
 831   new_loop = duplicate_loop (loop, loop_outer (loop));
 832   if (!new_loop)
 833     {
 834       free (bbs);
 835       return NULL;
 836     }
 837
 838   exit_dest = single_exit (loop)->dest;
 839   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 840                                           exit_dest) == loop->header ?
 841                  true : false);
 842
 843   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 844
 845   exit = single_exit (loop);
 846   copy_bbs (bbs, loop->num_nodes, new_bbs,
 847             &exit, 1, &new_exit, NULL,
 848             e->src);
 849
 850   /* Duplicating phi args at exit bbs as coming
 851      also from exit of duplicated loop.  */
 852   for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
 853     {
 854       phi = gsi_stmt (gsi);
 855       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
 856       if (phi_arg)
 857         {
 858           edge new_loop_exit_edge;
 859           source_location locus;
 860
 861           locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
 862           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 863             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 864           else
 865             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 866
 867           add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
 868         }
 869     }
 870
 871   if (at_exit) /* Add the loop copy at exit.  */
 872     {
 873       redirect_edge_and_branch_force (e, new_loop->header);
 874       PENDING_STMT (e) = NULL;
 875       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 876       if (was_imm_dom)
 877         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 878     }
 879   else /* Add the copy at entry.  */
 880     {
 881       edge new_exit_e;
 882       edge entry_e = loop_preheader_edge (loop);
 883       basic_block preheader = entry_e->src;
 884
 885       if (!flow_bb_inside_loop_p (new_loop,
 886                                   EDGE_SUCC (new_loop->header, 0)->dest))
 887         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 888       else
 889         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 890
 891       redirect_edge_and_branch_force (new_exit_e, loop->header);
 892       PENDING_STMT (new_exit_e) = NULL;
 893       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 894                                new_exit_e->src);
 895
 896       /* We have to add phi args to the loop->header here as coming
 897          from new_exit_e edge.  */
 898       for (gsi = gsi_start_phis (loop->header);
 899            !gsi_end_p (gsi);
 900            gsi_next (&gsi))
 901         {
 902           phi = gsi_stmt (gsi);
 903           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 904           if (phi_arg)
 905             add_phi_arg (phi, phi_arg, new_exit_e,
 906                          gimple_phi_arg_location_from_edge (phi, entry_e));
 907         }
 908
 909       redirect_edge_and_branch_force (entry_e, new_loop->header);
 910       PENDING_STMT (entry_e) = NULL;
 911       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 912     }
 913
 914   free (new_bbs);
 915   free (bbs);
 916
 917   return new_loop;
 918 }
 919
 920
 921 /* Given the condition statement COND, put it as the last statement
 922    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 923    Assumes that this is the single exit of the guarded loop.
 924    Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
 925
 926 static edge
 927 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 928                        gimple_seq cond_expr_stmt_list,
 929                        basic_block exit_bb, basic_block dom_bb,
 930                        int probability)
 931 {
 932   gimple_stmt_iterator gsi;
 933   edge new_e, enter_e;
 934   gimple cond_stmt;
 935   gimple_seq gimplify_stmt_list = NULL;
 936
 937   enter_e = EDGE_SUCC (guard_bb, 0);
 938   enter_e->flags &= ~EDGE_FALLTHRU;
 939   enter_e->flags |= EDGE_FALSE_VALUE;
 940   gsi = gsi_last_bb (guard_bb);
 941
 942   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 943                                  NULL_TREE);
 944   if (gimplify_stmt_list)
 945     gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 946   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 947   if (cond_expr_stmt_list)
 948     gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
 949
 950   gsi = gsi_last_bb (guard_bb);
 951   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 952
 953   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 954   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 955
 956   new_e->count = guard_bb->count;
 957   new_e->probability = probability;
 958   new_e->count = apply_probability (enter_e->count, probability);
 959   enter_e->count -= new_e->count;
 960   enter_e->probability = inverse_probability (probability);
 961   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 962   return new_e;
 963 }
 964
 965
 966 /* This function verifies that the following restrictions apply to LOOP:
 967    (1) it is innermost
 968    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 969    (3) it is single entry, single exit
 970    (4) its exit condition is the last stmt in the header
 971    (5) E is the entry/exit edge of LOOP.
 972  */
 973
 974 bool
 975 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 976 {
 977   edge exit_e = single_exit (loop);
 978   edge entry_e = loop_preheader_edge (loop);
 979   gimple orig_cond = get_loop_exit_condition (loop);
 980   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 981
 982   if (need_ssa_update_p (cfun))
 983     return false;
 984
 985   if (loop->inner
 986       /* All loops have an outer scope; the only case loop->outer is NULL is for
 987          the function itself.  */
 988       || !loop_outer (loop)
 989       || loop->num_nodes != 2
 990       || !empty_block_p (loop->latch)
 991       || !single_exit (loop)
 992       /* Verify that new loop exit condition can be trivially modified.  */
 993       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 994       || (e != exit_e && e != entry_e))
 995     return false;
 996
 997   return true;
 998 }
 999
1000 #ifdef ENABLE_CHECKING
1001 static void
1002 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1003                                  struct loop *second_loop)
1004 {
1005   basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1006   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1007   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1008
1009   /* A guard that controls whether the second_loop is to be executed or skipped
1010      is placed in first_loop->exit.  first_loop->exit therefore has two
1011      successors - one is the preheader of second_loop, and the other is a bb
1012      after second_loop.
1013    */
1014   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1015
1016   /* 1. Verify that one of the successors of first_loop->exit is the preheader
1017         of second_loop.  */
1018
1019   /* The preheader of new_loop is expected to have two predecessors:
1020      first_loop->exit and the block that precedes first_loop.  */
1021
1022   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1023               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1024                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1025                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1026                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1027
1028   /* Verify that the other successor of first_loop->exit is after the
1029      second_loop.  */
1030   /* TODO */
1031 }
1032 #endif
1033
1034 /* If the run time cost model check determines that vectorization is
1035    not profitable and hence scalar loop should be generated then set
1036    FIRST_NITERS to prologue peeled iterations. This will allow all the
1037    iterations to be executed in the prologue peeled scalar loop.  */
1038
1039 static void
1040 set_prologue_iterations (basic_block bb_before_first_loop,
1041                          tree *first_niters,
1042                          struct loop *loop,
1043                          unsigned int th,
1044                          int probability)
1045 {
1046   edge e;
1047   basic_block cond_bb, then_bb;
1048   tree var, prologue_after_cost_adjust_name;
1049   gimple_stmt_iterator gsi;
1050   gimple newphi;
1051   edge e_true, e_false, e_fallthru;
1052   gimple cond_stmt;
1053   gimple_seq stmts = NULL;
1054   tree cost_pre_condition = NULL_TREE;
1055   tree scalar_loop_iters =
1056     unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1057
1058   e = single_pred_edge (bb_before_first_loop);
1059   cond_bb = split_edge(e);
1060
1061   e = single_pred_edge (bb_before_first_loop);
1062   then_bb = split_edge(e);
1063   set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1064
1065   e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1066                                    EDGE_FALSE_VALUE);
1067   set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1068
1069   e_true = EDGE_PRED (then_bb, 0);
1070   e_true->flags &= ~EDGE_FALLTHRU;
1071   e_true->flags |= EDGE_TRUE_VALUE;
1072
1073   e_true->probability = probability;
1074   e_false->probability = inverse_probability (probability);
1075   e_true->count = apply_probability (cond_bb->count, probability);
1076   e_false->count = cond_bb->count - e_true->count;
1077   then_bb->frequency = EDGE_FREQUENCY (e_true);
1078   then_bb->count = e_true->count;
1079
1080   e_fallthru = EDGE_SUCC (then_bb, 0);
1081   e_fallthru->count = then_bb->count;
1082
1083   gsi = gsi_last_bb (cond_bb);
1084   cost_pre_condition =
1085     fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1086                  build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1087   cost_pre_condition =
1088     force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1089                                 NULL_TREE, false, GSI_CONTINUE_LINKING);
1090   cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1091                                            NULL_TREE, NULL_TREE);
1092   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1093
1094   var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1095                         "prologue_after_cost_adjust");
1096   prologue_after_cost_adjust_name =
1097     force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1098
1099   gsi = gsi_last_bb (then_bb);
1100   if (stmts)
1101     gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1102
1103   newphi = create_phi_node (var, bb_before_first_loop);
1104   add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1105                UNKNOWN_LOCATION);
1106   add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1107
1108   *first_niters = PHI_RESULT (newphi);
1109 }
1110
1111 /* Function slpeel_tree_peel_loop_to_edge.
1112
1113    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1114    that is placed on the entry (exit) edge E of LOOP. After this transformation
1115    we have two loops one after the other - first-loop iterates FIRST_NITERS
1116    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1117    If the cost model indicates that it is profitable to emit a scalar
1118    loop instead of the vector one, then the prolog (epilog) loop will iterate
1119    for the entire unchanged scalar iterations of the loop.
1120
1121    Input:
1122    - LOOP: the loop to be peeled.
1123    - E: the exit or entry edge of LOOP.
1124         If it is the entry edge, we peel the first iterations of LOOP. In this
1125         case first-loop is LOOP, and second-loop is the newly created loop.
1126         If it is the exit edge, we peel the last iterations of LOOP. In this
1127         case, first-loop is the newly created loop, and second-loop is LOOP.
1128    - NITERS: the number of iterations that LOOP iterates.
1129    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1130    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1131         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1132         is false, the caller of this function may want to take care of this
1133         (this can be useful if we don't want new stmts added to first-loop).
1134    - TH: cost model profitability threshold of iterations for vectorization.
1135    - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1136                           during versioning and hence needs to occur during
1137                           prologue generation or whether cost model check
1138                           has not occurred during prologue generation and hence
1139                           needs to occur during epilogue generation.
1140    - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1141    - BOUND2 is the upper bound on number of iterations of the second loop (if known)
1142
1143
1144    Output:
1145    The function returns a pointer to the new loop-copy, or NULL if it failed
1146    to perform the transformation.
1147
1148    The function generates two if-then-else guards: one before the first loop,
1149    and the other before the second loop:
1150    The first guard is:
1151      if (FIRST_NITERS == 0) then skip the first loop,
1152      and go directly to the second loop.
1153    The second guard is:
1154      if (FIRST_NITERS == NITERS) then skip the second loop.
1155
1156    If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1157    then the generated condition is combined with COND_EXPR and the
1158    statements in COND_EXPR_STMT_LIST are emitted together with it.
1159
1160    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1161    FORNOW the resulting code will not be in loop-closed-ssa form.
1162 */
1163
1164 static struct loop*
1165 slpeel_tree_peel_loop_to_edge (struct loop *loop,
1166                                edge e, tree *first_niters,
1167                                tree niters, bool update_first_loop_count,
1168                                unsigned int th, bool check_profitability,
1169                                tree cond_expr, gimple_seq cond_expr_stmt_list,
1170                                int bound1, int bound2)
1171 {
1172   struct loop *new_loop = NULL, *first_loop, *second_loop;
1173   edge skip_e;
1174   tree pre_condition = NULL_TREE;
1175   basic_block bb_before_second_loop, bb_after_second_loop;
1176   basic_block bb_before_first_loop;
1177   basic_block bb_between_loops;
1178   basic_block new_exit_bb;
1179   gimple_stmt_iterator gsi;
1180   edge exit_e = single_exit (loop);
1181   LOC loop_loc;
1182   tree cost_pre_condition = NULL_TREE;
1183   /* There are many aspects to how likely the first loop is going to be executed.
1184      Without histogram we can't really do good job.  Simply set it to
1185      2/3, so the first loop is not reordered to the end of function and
1186      the hot path through stays short.  */
1187   int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1188   int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1189   int probability_of_second_loop;
1190
1191   if (!slpeel_can_duplicate_loop_p (loop, e))
1192     return NULL;
1193
1194   /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1195      in the exit bb and rename all the uses after the loop.  This simplifies
1196      the *guard[12] routines, which assume loop closed SSA form for all PHIs
1197      (but normally loop closed SSA form doesn't require virtual PHIs to be
1198      in the same form).  Doing this early simplifies the checking what
1199      uses should be renamed.  */
1200   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1201     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1202       {
1203         gimple phi = gsi_stmt (gsi);
1204         for (gsi = gsi_start_phis (exit_e->dest);
1205              !gsi_end_p (gsi); gsi_next (&gsi))
1206           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
1207             break;
1208         if (gsi_end_p (gsi))
1209           {
1210             tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1211             gimple new_phi = create_phi_node (new_vop, exit_e->dest);
1212             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1213             imm_use_iterator imm_iter;
1214             gimple stmt;
1215             use_operand_p use_p;
1216
1217             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1218             gimple_phi_set_result (new_phi, new_vop);
1219             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1220               if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1221                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1222                   SET_USE (use_p, new_vop);
1223           }
1224         break;
1225       }
1226
1227   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1228         Resulting CFG would be:
1229
1230         first_loop:
1231         do {
1232         } while ...
1233
1234         second_loop:
1235         do {
1236         } while ...
1237
1238         orig_exit_bb:
1239    */
1240
1241   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1242     {
1243       loop_loc = find_loop_location (loop);
1244       dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1245                        "tree_duplicate_loop_to_edge_cfg failed.\n");
1246       return NULL;
1247     }
1248
1249   if (MAY_HAVE_DEBUG_STMTS)
1250     {
1251       gcc_assert (!adjust_vec.exists ());
1252       vec_stack_alloc (adjust_info, adjust_vec, 32);
1253     }
1254
1255   if (e == exit_e)
1256     {
1257       /* NEW_LOOP was placed after LOOP.  */
1258       first_loop = loop;
1259       second_loop = new_loop;
1260     }
1261   else
1262     {
1263       /* NEW_LOOP was placed before LOOP.  */
1264       first_loop = new_loop;
1265       second_loop = loop;
1266     }
1267
1268   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1269   rename_variables_in_loop (new_loop);
1270
1271
1272   /* 2.  Add the guard code in one of the following ways:
1273
1274      2.a Add the guard that controls whether the first loop is executed.
1275          This occurs when this function is invoked for prologue or epilogue
1276          generation and when the cost model check can be done at compile time.
1277
1278          Resulting CFG would be:
1279
1280          bb_before_first_loop:
1281          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1282                                 GOTO first-loop
1283
1284          first_loop:
1285          do {
1286          } while ...
1287
1288          bb_before_second_loop:
1289
1290          second_loop:
1291          do {
1292          } while ...
1293
1294          orig_exit_bb:
1295
1296      2.b Add the cost model check that allows the prologue
1297          to iterate for the entire unchanged scalar
1298          iterations of the loop in the event that the cost
1299          model indicates that the scalar loop is more
1300          profitable than the vector one. This occurs when
1301          this function is invoked for prologue generation
1302          and the cost model check needs to be done at run
1303          time.
1304
1305          Resulting CFG after prologue peeling would be:
1306
1307          if (scalar_loop_iterations <= th)
1308            FIRST_NITERS = scalar_loop_iterations
1309
1310          bb_before_first_loop:
1311          if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1312                                 GOTO first-loop
1313
1314          first_loop:
1315          do {
1316          } while ...
1317
1318          bb_before_second_loop:
1319
1320          second_loop:
1321          do {
1322          } while ...
1323
1324          orig_exit_bb:
1325
1326      2.c Add the cost model check that allows the epilogue
1327          to iterate for the entire unchanged scalar
1328          iterations of the loop in the event that the cost
1329          model indicates that the scalar loop is more
1330          profitable than the vector one. This occurs when
1331          this function is invoked for epilogue generation
1332          and the cost model check needs to be done at run
1333          time.  This check is combined with any pre-existing
1334          check in COND_EXPR to avoid versioning.
1335
1336          Resulting CFG after prologue peeling would be:
1337
1338          bb_before_first_loop:
1339          if ((scalar_loop_iterations <= th)
1340              ||
1341              FIRST_NITERS == 0) GOTO bb_before_second_loop
1342                                 GOTO first-loop
1343
1344          first_loop:
1345          do {
1346          } while ...
1347
1348          bb_before_second_loop:
1349
1350          second_loop:
1351          do {
1352          } while ...
1353
1354          orig_exit_bb:
1355   */
1356
1357   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1358   bb_before_second_loop = split_edge (single_exit (first_loop));
1359
1360   probability_of_second_loop = (inverse_probability (first_guard_probability)
1361                                 + combine_probabilities (second_guard_probability,
1362                                                          first_guard_probability));
1363   /* Theoretically preheader edge of first loop and exit edge should have
1364      same frequencies.  Loop exit probablities are however easy to get wrong.
1365      It is safer to copy value from original loop entry.  */
1366   bb_before_second_loop->frequency
1367      = apply_probability (bb_before_first_loop->frequency,
1368                           probability_of_second_loop);
1369   bb_before_second_loop->count
1370      = apply_probability (bb_before_first_loop->count,
1371                           probability_of_second_loop);
1372   single_succ_edge (bb_before_second_loop)->count
1373      = bb_before_second_loop->count;
1374
1375   /* Epilogue peeling.  */
1376   if (!update_first_loop_count)
1377     {
1378       pre_condition =
1379         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1380                      build_int_cst (TREE_TYPE (*first_niters), 0));
1381       if (check_profitability)
1382         {
1383           tree scalar_loop_iters
1384             = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1385                                         (loop_vec_info_for_loop (loop)));
1386           cost_pre_condition =
1387             fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1388                          build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1389
1390           pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1391                                        cost_pre_condition, pre_condition);
1392         }
1393       if (cond_expr)
1394         {
1395           pre_condition =
1396             fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1397                          pre_condition,
1398                          fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1399                                       cond_expr));
1400         }
1401     }
1402
1403   /* Prologue peeling.  */
1404   else
1405     {
1406       if (check_profitability)
1407         set_prologue_iterations (bb_before_first_loop, first_niters,
1408                                  loop, th, first_guard_probability);
1409
1410       pre_condition =
1411         fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1412                      build_int_cst (TREE_TYPE (*first_niters), 0));
1413     }
1414
1415   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1416                                   cond_expr_stmt_list,
1417                                   bb_before_second_loop, bb_before_first_loop,
1418                                   inverse_probability (first_guard_probability));
1419   scale_loop_profile (first_loop, first_guard_probability,
1420                       check_profitability && (int)th > bound1 ? th : bound1);
1421   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1422                                       first_loop == new_loop,
1423                                       &new_exit_bb);
1424
1425
1426   /* 3. Add the guard that controls whether the second loop is executed.
1427         Resulting CFG would be:
1428
1429         bb_before_first_loop:
1430         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1431                                GOTO first-loop
1432
1433         first_loop:
1434         do {
1435         } while ...
1436
1437         bb_between_loops:
1438         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1439                                     GOTO bb_before_second_loop
1440
1441         bb_before_second_loop:
1442
1443         second_loop:
1444         do {
1445         } while ...
1446
1447         bb_after_second_loop:
1448
1449         orig_exit_bb:
1450    */
1451
1452   bb_between_loops = new_exit_bb;
1453   bb_after_second_loop = split_edge (single_exit (second_loop));
1454
1455   pre_condition =
1456         fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1457   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1458                                   bb_after_second_loop, bb_before_first_loop,
1459                                   inverse_probability (second_guard_probability));
1460   scale_loop_profile (second_loop, probability_of_second_loop, bound2);
1461   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1462                                      second_loop == new_loop, &new_exit_bb);
1463
1464   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1465    */
1466   if (update_first_loop_count)
1467     slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1468
1469   delete_update_ssa ();
1470
1471   adjust_vec_debug_stmts ();
1472
1473   return new_loop;
1474 }
1475
1476 /* Function vect_get_loop_location.
1477
1478    Extract the location of the loop in the source code.
1479    If the loop is not well formed for vectorization, an estimated
1480    location is calculated.
1481    Return the loop location if succeed and NULL if not.  */
1482
1483 LOC
1484 find_loop_location (struct loop *loop)
1485 {
1486   gimple stmt = NULL;
1487   basic_block bb;
1488   gimple_stmt_iterator si;
1489
1490   if (!loop)
1491     return UNKNOWN_LOC;
1492
1493   stmt = get_loop_exit_condition (loop);
1494
1495   if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1496     return gimple_location (stmt);
1497
1498   /* If we got here the loop is probably not "well formed",
1499      try to estimate the loop location */
1500
1501   if (!loop->header)
1502     return UNKNOWN_LOC;
1503
1504   bb = loop->header;
1505
1506   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1507     {
1508       stmt = gsi_stmt (si);
1509       if (gimple_location (stmt) != UNKNOWN_LOC)
1510         return gimple_location (stmt);
1511     }
1512
1513   return UNKNOWN_LOC;
1514 }
1515
1516
1517 /* This function builds ni_name = number of iterations loop executes
1518    on the loop preheader.  If SEQ is given the stmt is instead emitted
1519    there.  */
1520
1521 static tree
1522 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1523 {
1524   tree ni_name, var;
1525   gimple_seq stmts = NULL;
1526   edge pe;
1527   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1528   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1529
1530   var = create_tmp_var (TREE_TYPE (ni), "niters");
1531   ni_name = force_gimple_operand (ni, &stmts, false, var);
1532
1533   pe = loop_preheader_edge (loop);
1534   if (stmts)
1535     {
1536       if (seq)
1537         gimple_seq_add_seq (&seq, stmts);
1538       else
1539         {
1540           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1541           gcc_assert (!new_bb);
1542         }
1543     }
1544
1545   return ni_name;
1546 }
1547
1548
1549 /* This function generates the following statements:
1550
1551  ni_name = number of iterations loop executes
1552  ratio = ni_name / vf
1553  ratio_mult_vf_name = ratio * vf
1554
1555  and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1556  if that is non-NULL.  */
1557
1558 static void
1559 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1560                                  tree *ni_name_ptr,
1561                                  tree *ratio_mult_vf_name_ptr,
1562                                  tree *ratio_name_ptr,
1563                                  gimple_seq cond_expr_stmt_list)
1564 {
1565
1566   edge pe;
1567   basic_block new_bb;
1568   gimple_seq stmts;
1569   tree ni_name, ni_minus_gap_name;
1570   tree var;
1571   tree ratio_name;
1572   tree ratio_mult_vf_name;
1573   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1574   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1575   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1576   tree log_vf;
1577
1578   pe = loop_preheader_edge (loop);
1579
1580   /* Generate temporary variable that contains
1581      number of iterations loop executes.  */
1582
1583   ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1584   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1585
1586   /* If epilogue loop is required because of data accesses with gaps, we
1587      subtract one iteration from the total number of iterations here for
1588      correct calculation of RATIO.  */
1589   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1590     {
1591       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1592                                        ni_name,
1593                                        build_one_cst (TREE_TYPE (ni_name)));
1594       if (!is_gimple_val (ni_minus_gap_name))
1595         {
1596           var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1597
1598           stmts = NULL;
1599           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1600                                                     true, var);
1601           if (cond_expr_stmt_list)
1602             gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1603           else
1604             {
1605               pe = loop_preheader_edge (loop);
1606               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1607               gcc_assert (!new_bb);
1608             }
1609         }
1610     }
1611   else
1612     ni_minus_gap_name = ni_name;
1613
1614   /* Create: ratio = ni >> log2(vf) */
1615
1616   ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1617                             ni_minus_gap_name, log_vf);
1618   if (!is_gimple_val (ratio_name))
1619     {
1620       var = create_tmp_var (TREE_TYPE (ni), "bnd");
1621
1622       stmts = NULL;
1623       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1624       if (cond_expr_stmt_list)
1625         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1626       else
1627         {
1628           pe = loop_preheader_edge (loop);
1629           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1630           gcc_assert (!new_bb);
1631         }
1632     }
1633
1634   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1635
1636   ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1637                                     ratio_name, log_vf);
1638   if (!is_gimple_val (ratio_mult_vf_name))
1639     {
1640       var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1641
1642       stmts = NULL;
1643       ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1644                                                  true, var);
1645       if (cond_expr_stmt_list)
1646         gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1647       else
1648         {
1649           pe = loop_preheader_edge (loop);
1650           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1651           gcc_assert (!new_bb);
1652         }
1653     }
1654
1655   *ni_name_ptr = ni_name;
1656   *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1657   *ratio_name_ptr = ratio_name;
1658
1659   return;
1660 }
1661
1662 /* Function vect_can_advance_ivs_p
1663
1664    In case the number of iterations that LOOP iterates is unknown at compile
1665    time, an epilog loop will be generated, and the loop induction variables
1666    (IVs) will be "advanced" to the value they are supposed to take just before
1667    the epilog loop.  Here we check that the access function of the loop IVs
1668    and the expression that represents the loop bound are simple enough.
1669    These restrictions will be relaxed in the future.  */
1670
1671 bool
1672 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1673 {
1674   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675   basic_block bb = loop->header;
1676   gimple phi;
1677   gimple_stmt_iterator gsi;
1678
1679   /* Analyze phi functions of the loop header.  */
1680
1681   if (dump_enabled_p ())
1682     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
1683   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1684     {
1685       tree access_fn = NULL;
1686       tree evolution_part;
1687
1688       phi = gsi_stmt (gsi);
1689       if (dump_enabled_p ())
1690         {
1691           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1692           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1693         }
1694
1695       /* Skip virtual phi's. The data dependences that are associated with
1696          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1697
1698       if (virtual_operand_p (PHI_RESULT (phi)))
1699         {
1700           if (dump_enabled_p ())
1701             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1702                              "virtual phi. skip.");
1703           continue;
1704         }
1705
1706       /* Skip reduction phis.  */
1707
1708       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1709         {
1710           if (dump_enabled_p ())
1711             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1712                              "reduc phi. skip.");
1713           continue;
1714         }
1715
1716       /* Analyze the evolution function.  */
1717
1718       access_fn = instantiate_parameters
1719         (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1720
1721       if (!access_fn)
1722         {
1723           if (dump_enabled_p ())
1724             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1725                              "No Access function.");
1726           return false;
1727         }
1728
1729       STRIP_NOPS (access_fn);
1730       if (dump_enabled_p ())
1731         {
1732           dump_printf_loc (MSG_NOTE, vect_location,
1733                            "Access function of PHI: ");
1734           dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
1735         }
1736
1737       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1738
1739       if (evolution_part == NULL_TREE)
1740         {
1741           if (dump_enabled_p ())
1742             dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution.");
1743           return false;
1744         }
1745
1746       /* FORNOW: We do not transform initial conditions of IVs
1747          which evolution functions are a polynomial of degree >= 2.  */
1748
1749       if (tree_is_chrec (evolution_part))
1750         return false;
1751     }
1752
1753   return true;
1754 }
1755
1756
1757 /*   Function vect_update_ivs_after_vectorizer.
1758
1759      "Advance" the induction variables of LOOP to the value they should take
1760      after the execution of LOOP.  This is currently necessary because the
1761      vectorizer does not handle induction variables that are used after the
1762      loop.  Such a situation occurs when the last iterations of LOOP are
1763      peeled, because:
1764      1. We introduced new uses after LOOP for IVs that were not originally used
1765         after LOOP: the IVs of LOOP are now used by an epilog loop.
1766      2. LOOP is going to be vectorized; this means that it will iterate N/VF
1767         times, whereas the loop IVs should be bumped N times.
1768
1769      Input:
1770      - LOOP - a loop that is going to be vectorized. The last few iterations
1771               of LOOP were peeled.
1772      - NITERS - the number of iterations that LOOP executes (before it is
1773                 vectorized). i.e, the number of times the ivs should be bumped.
1774      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1775                   coming out from LOOP on which there are uses of the LOOP ivs
1776                   (this is the path from LOOP->exit to epilog_loop->preheader).
1777
1778                   The new definitions of the ivs are placed in LOOP->exit.
1779                   The phi args associated with the edge UPDATE_E in the bb
1780                   UPDATE_E->dest are updated accordingly.
1781
1782      Assumption 1: Like the rest of the vectorizer, this function assumes
1783      a single loop exit that has a single predecessor.
1784
1785      Assumption 2: The phi nodes in the LOOP header and in update_bb are
1786      organized in the same order.
1787
1788      Assumption 3: The access function of the ivs is simple enough (see
1789      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1790
1791      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1792      coming out of LOOP on which the ivs of LOOP are used (this is the path
1793      that leads to the epilog loop; other paths skip the epilog loop).  This
1794      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1795      needs to have its phis updated.
1796  */
1797
1798 static void
1799 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1800                                   edge update_e)
1801 {
1802   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1803   basic_block exit_bb = single_exit (loop)->dest;
1804   gimple phi, phi1;
1805   gimple_stmt_iterator gsi, gsi1;
1806   basic_block update_bb = update_e->dest;
1807
1808   /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1809
1810   /* Make sure there exists a single-predecessor exit bb:  */
1811   gcc_assert (single_pred_p (exit_bb));
1812
1813   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1814        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1815        gsi_next (&gsi), gsi_next (&gsi1))
1816     {
1817       tree init_expr;
1818       tree step_expr, off;
1819       tree type;
1820       tree var, ni, ni_name;
1821       gimple_stmt_iterator last_gsi;
1822       stmt_vec_info stmt_info;
1823
1824       phi = gsi_stmt (gsi);
1825       phi1 = gsi_stmt (gsi1);
1826       if (dump_enabled_p ())
1827         {
1828           dump_printf_loc (MSG_NOTE, vect_location,
1829                            "vect_update_ivs_after_vectorizer: phi: ");
1830           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1831         }
1832
1833       /* Skip virtual phi's.  */
1834       if (virtual_operand_p (PHI_RESULT (phi)))
1835         {
1836           if (dump_enabled_p ())
1837             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838                              "virtual phi. skip.");
1839           continue;
1840         }
1841
1842       /* Skip reduction phis.  */
1843       stmt_info = vinfo_for_stmt (phi);
1844       if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1845         {
1846           if (dump_enabled_p ())
1847             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848                              "reduc phi. skip.");
1849           continue;
1850         }
1851
1852       type = TREE_TYPE (gimple_phi_result (phi));
1853       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1854       step_expr = unshare_expr (step_expr);
1855
1856       /* FORNOW: We do not support IVs whose evolution function is a polynomial
1857          of degree >= 2 or exponential.  */
1858       gcc_assert (!tree_is_chrec (step_expr));
1859
1860       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1861
1862       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1863                          fold_convert (TREE_TYPE (step_expr), niters),
1864                          step_expr);
1865       if (POINTER_TYPE_P (type))
1866         ni = fold_build_pointer_plus (init_expr, off);
1867       else
1868         ni = fold_build2 (PLUS_EXPR, type,
1869                           init_expr, fold_convert (type, off));
1870
1871       var = create_tmp_var (type, "tmp");
1872
1873       last_gsi = gsi_last_bb (exit_bb);
1874       ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1875                                           true, GSI_SAME_STMT);
1876
1877       /* Fix phi expressions in the successor bb.  */
1878       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1879     }
1880 }
1881
1882 /* Function vect_do_peeling_for_loop_bound
1883
1884    Peel the last iterations of the loop represented by LOOP_VINFO.
1885    The peeled iterations form a new epilog loop.  Given that the loop now
1886    iterates NITERS times, the new epilog loop iterates
1887    NITERS % VECTORIZATION_FACTOR times.
1888
1889    The original loop will later be made to iterate
1890    NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1891
1892    COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1893    test.  */
1894
1895 void
1896 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1897                                 unsigned int th, bool check_profitability)
1898 {
1899   tree ni_name, ratio_mult_vf_name;
1900   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1901   struct loop *new_loop;
1902   edge update_e;
1903   basic_block preheader;
1904   int loop_num;
1905   int max_iter;
1906   tree cond_expr = NULL_TREE;
1907   gimple_seq cond_expr_stmt_list = NULL;
1908
1909   if (dump_enabled_p ())
1910     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1911                      "=== vect_do_peeling_for_loop_bound ===");
1912
1913   initialize_original_copy_tables ();
1914
1915   /* Generate the following variables on the preheader of original loop:
1916
1917      ni_name = number of iteration the original loop executes
1918      ratio = ni_name / vf
1919      ratio_mult_vf_name = ratio * vf  */
1920   vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1921                                    &ratio_mult_vf_name, ratio,
1922                                    cond_expr_stmt_list);
1923
1924   loop_num  = loop->num;
1925
1926   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1927                                             &ratio_mult_vf_name, ni_name, false,
1928                                             th, check_profitability,
1929                                             cond_expr, cond_expr_stmt_list,
1930                                             0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1931   gcc_assert (new_loop);
1932   gcc_assert (loop_num == loop->num);
1933 #ifdef ENABLE_CHECKING
1934   slpeel_verify_cfg_after_peeling (loop, new_loop);
1935 #endif
1936
1937   /* A guard that controls whether the new_loop is to be executed or skipped
1938      is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1939      is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1940      is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1941      is on the path where the LOOP IVs are used and need to be updated.  */
1942
1943   preheader = loop_preheader_edge (new_loop)->src;
1944   if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1945     update_e = EDGE_PRED (preheader, 0);
1946   else
1947     update_e = EDGE_PRED (preheader, 1);
1948
1949   /* Update IVs of original loop as if they were advanced
1950      by ratio_mult_vf_name steps.  */
1951   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1952
1953   /* For vectorization factor N, we need to copy last N-1 values in epilogue
1954      and this means N-2 loopback edge executions.
1955
1956      PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1957      will execute at least LOOP_VINFO_VECT_FACTOR times.  */
1958   max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1959               ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1960               : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
1961   if (check_profitability)
1962     max_iter = MAX (max_iter, (int) th - 1);
1963   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
1964   dump_printf (MSG_OPTIMIZED_LOCATIONS,
1965                "Setting upper bound of nb iterations for epilogue "
1966                "loop to %d\n", max_iter);
1967
1968   /* After peeling we have to reset scalar evolution analyzer.  */
1969   scev_reset ();
1970
1971   free_original_copy_tables ();
1972 }
1973
1974
1975 /* Function vect_gen_niters_for_prolog_loop
1976
1977    Set the number of iterations for the loop represented by LOOP_VINFO
1978    to the minimum between LOOP_NITERS (the original iteration count of the loop)
1979    and the misalignment of DR - the data reference recorded in
1980    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1981    this loop, the data reference DR will refer to an aligned location.
1982
1983    The following computation is generated:
1984
1985    If the misalignment of DR is known at compile time:
1986      addr_mis = int mis = DR_MISALIGNMENT (dr);
1987    Else, compute address misalignment in bytes:
1988      addr_mis = addr & (vectype_align - 1)
1989
1990    prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1991
1992    (elem_size = element type size; an element is the scalar element whose type
1993    is the inner type of the vectype)
1994
1995    When the step of the data-ref in the loop is not 1 (as in interleaved data
1996    and SLP), the number of iterations of the prolog must be divided by the step
1997    (which is equal to the size of interleaved group).
1998
1999    The above formulas assume that VF == number of elements in the vector. This
2000    may not hold when there are multiple-types in the loop.
2001    In this case, for some data-references in the loop the VF does not represent
2002    the number of elements that fit in the vector.  Therefore, instead of VF we
2003    use TYPE_VECTOR_SUBPARTS.  */
2004
2005 static tree
2006 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
2007 {
2008   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2009   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2010   tree var;
2011   gimple_seq stmts;
2012   tree iters, iters_name;
2013   edge pe;
2014   basic_block new_bb;
2015   gimple dr_stmt = DR_STMT (dr);
2016   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2017   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2018   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2019   tree niters_type = TREE_TYPE (loop_niters);
2020   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2021
2022   pe = loop_preheader_edge (loop);
2023
2024   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2025     {
2026       int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2027
2028       if (dump_enabled_p ())
2029         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2030                          "known peeling = %d.", npeel);
2031
2032       iters = build_int_cst (niters_type, npeel);
2033       *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2034     }
2035   else
2036     {
2037       gimple_seq new_stmts = NULL;
2038       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
2039       tree offset = negative
2040           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2041       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2042                                                 &new_stmts, offset, loop);
2043       tree type = unsigned_type_for (TREE_TYPE (start_addr));
2044       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
2045       HOST_WIDE_INT elem_size =
2046                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2047       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
2048       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2049       tree nelements_tree = build_int_cst (type, nelements);
2050       tree byte_misalign;
2051       tree elem_misalign;
2052
2053       new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2054       gcc_assert (!new_bb);
2055
2056       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
2057       byte_misalign =
2058         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2059                      vectype_align_minus_1);
2060
2061       /* Create:  elem_misalign = byte_misalign / element_size  */
2062       elem_misalign =
2063         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2064
2065       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
2066       if (negative)
2067         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
2068       else
2069         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2070       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2071       iters = fold_convert (niters_type, iters);
2072       *bound = nelements;
2073     }
2074
2075   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2076   /* If the loop bound is known at compile time we already verified that it is
2077      greater than vf; since the misalignment ('iters') is at most vf, there's
2078      no need to generate the MIN_EXPR in this case.  */
2079   if (TREE_CODE (loop_niters) != INTEGER_CST)
2080     iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2081
2082   if (dump_enabled_p ())
2083     {
2084       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2085                        "niters for prolog loop: ");
2086       dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, iters);
2087     }
2088
2089   var = create_tmp_var (niters_type, "prolog_loop_niters");
2090   stmts = NULL;
2091   iters_name = force_gimple_operand (iters, &stmts, false, var);
2092
2093   /* Insert stmt on loop preheader edge.  */
2094   if (stmts)
2095     {
2096       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2097       gcc_assert (!new_bb);
2098     }
2099
2100   return iters_name;
2101 }
2102
2103
2104 /* Function vect_update_init_of_dr
2105
2106    NITERS iterations were peeled from LOOP.  DR represents a data reference
2107    in LOOP.  This function updates the information recorded in DR to
2108    account for the fact that the first NITERS iterations had already been
2109    executed.  Specifically, it updates the OFFSET field of DR.  */
2110
2111 static void
2112 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2113 {
2114   tree offset = DR_OFFSET (dr);
2115
2116   niters = fold_build2 (MULT_EXPR, sizetype,
2117                         fold_convert (sizetype, niters),
2118                         fold_convert (sizetype, DR_STEP (dr)));
2119   offset = fold_build2 (PLUS_EXPR, sizetype,
2120                         fold_convert (sizetype, offset), niters);
2121   DR_OFFSET (dr) = offset;
2122 }
2123
2124
2125 /* Function vect_update_inits_of_drs
2126
2127    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2128    This function updates the information recorded for the data references in
2129    the loop to account for the fact that the first NITERS iterations had
2130    already been executed.  Specifically, it updates the initial_condition of
2131    the access_function of all the data_references in the loop.  */
2132
2133 static void
2134 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2135 {
2136   unsigned int i;
2137   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2138   struct data_reference *dr;
2139
2140  if (dump_enabled_p ())
2141     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2142                      "=== vect_update_inits_of_dr ===");
2143
2144   FOR_EACH_VEC_ELT (datarefs, i, dr)
2145     vect_update_init_of_dr (dr, niters);
2146 }
2147
2148
2149 /* Function vect_do_peeling_for_alignment
2150
2151    Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2152    'niters' is set to the misalignment of one of the data references in the
2153    loop, thereby forcing it to refer to an aligned location at the beginning
2154    of the execution of this loop.  The data reference for which we are
2155    peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2156
2157 void
2158 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
2159                                unsigned int th, bool check_profitability)
2160 {
2161   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2162   tree niters_of_prolog_loop, ni_name;
2163   tree n_iters;
2164   tree wide_prolog_niters;
2165   struct loop *new_loop;
2166   int max_iter;
2167   int bound = 0;
2168
2169   if (dump_enabled_p ())
2170     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2171                      "=== vect_do_peeling_for_alignment ===");
2172
2173   initialize_original_copy_tables ();
2174
2175   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2176   niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2177                                                            ni_name,
2178                                                            &bound);
2179
2180   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2181   new_loop =
2182     slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2183                                    &niters_of_prolog_loop, ni_name, true,
2184                                    th, check_profitability, NULL_TREE, NULL,
2185                                    bound,
2186                                    0);
2187
2188   gcc_assert (new_loop);
2189 #ifdef ENABLE_CHECKING
2190   slpeel_verify_cfg_after_peeling (new_loop, loop);
2191 #endif
2192   /* For vectorization factor N, we need to copy at most N-1 values
2193      for alignment and this means N-2 loopback edge executions.  */
2194   max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
2195   if (check_profitability)
2196     max_iter = MAX (max_iter, (int) th - 1);
2197   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
2198   dump_printf (MSG_OPTIMIZED_LOCATIONS,
2199                "Setting upper bound of nb iterations for prologue "
2200                "loop to %d\n", max_iter);
2201
2202   /* Update number of times loop executes.  */
2203   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2204   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2205                 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2206
2207   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2208     wide_prolog_niters = niters_of_prolog_loop;
2209   else
2210     {
2211       gimple_seq seq = NULL;
2212       edge pe = loop_preheader_edge (loop);
2213       tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2214       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2215       wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2216                                                  var);
2217       if (seq)
2218         {
2219           /* Insert stmt on loop preheader edge.  */
2220           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2221           gcc_assert (!new_bb);
2222         }
2223     }
2224
2225   /* Update the init conditions of the access functions of all data refs.  */
2226   vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2227
2228   /* After peeling we have to reset scalar evolution analyzer.  */
2229   scev_reset ();
2230
2231   free_original_copy_tables ();
2232 }
2233
2234
2235 /* Function vect_create_cond_for_align_checks.
2236
2237    Create a conditional expression that represents the alignment checks for
2238    all of data references (array element references) whose alignment must be
2239    checked at runtime.
2240
2241    Input:
2242    COND_EXPR  - input conditional expression.  New conditions will be chained
2243                 with logical AND operation.
2244    LOOP_VINFO - two fields of the loop information are used.
2245                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2246                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2247
2248    Output:
2249    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2250                          expression.
2251    The returned value is the conditional expression to be used in the if
2252    statement that controls which version of the loop gets executed at runtime.
2253
2254    The algorithm makes two assumptions:
2255      1) The number of bytes "n" in a vector is a power of 2.
2256      2) An address "a" is aligned if a%n is zero and that this
2257         test can be done as a&(n-1) == 0.  For example, for 16
2258         byte vectors the test is a&0xf == 0.  */
2259
2260 static void
2261 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2262                                    tree *cond_expr,
2263                                    gimple_seq *cond_expr_stmt_list)
2264 {
2265   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2266   vec<gimple> may_misalign_stmts
2267     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2268   gimple ref_stmt;
2269   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2270   tree mask_cst;
2271   unsigned int i;
2272   tree int_ptrsize_type;
2273   char tmp_name[20];
2274   tree or_tmp_name = NULL_TREE;
2275   tree and_tmp_name;
2276   gimple and_stmt;
2277   tree ptrsize_zero;
2278   tree part_cond_expr;
2279
2280   /* Check that mask is one less than a power of 2, i.e., mask is
2281      all zeros followed by all ones.  */
2282   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2283
2284   int_ptrsize_type = signed_type_for (ptr_type_node);
2285
2286   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2287      of the first vector of the i'th data reference. */
2288
2289   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
2290     {
2291       gimple_seq new_stmt_list = NULL;
2292       tree addr_base;
2293       tree addr_tmp_name;
2294       tree new_or_tmp_name;
2295       gimple addr_stmt, or_stmt;
2296       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2297       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2298       bool negative = tree_int_cst_compare
2299         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2300       tree offset = negative
2301         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2302
2303       /* create: addr_tmp = (int)(address_of_first_vector) */
2304       addr_base =
2305         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2306                                               offset, loop);
2307       if (new_stmt_list != NULL)
2308         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2309
2310       sprintf (tmp_name, "addr2int%d", i);
2311       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2312       addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2313                                                 addr_base, NULL_TREE);
2314       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2315
2316       /* The addresses are OR together.  */
2317
2318       if (or_tmp_name != NULL_TREE)
2319         {
2320           /* create: or_tmp = or_tmp | addr_tmp */
2321           sprintf (tmp_name, "orptrs%d", i);
2322           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2323           or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2324                                                   new_or_tmp_name,
2325                                                   or_tmp_name, addr_tmp_name);
2326           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2327           or_tmp_name = new_or_tmp_name;
2328         }
2329       else
2330         or_tmp_name = addr_tmp_name;
2331
2332     } /* end for i */
2333
2334   mask_cst = build_int_cst (int_ptrsize_type, mask);
2335
2336   /* create: and_tmp = or_tmp & mask  */
2337   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2338
2339   and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2340                                            or_tmp_name, mask_cst);
2341   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2342
2343   /* Make and_tmp the left operand of the conditional test against zero.
2344      if and_tmp has a nonzero bit then some address is unaligned.  */
2345   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2346   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2347                                 and_tmp_name, ptrsize_zero);
2348   if (*cond_expr)
2349     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2350                               *cond_expr, part_cond_expr);
2351   else
2352     *cond_expr = part_cond_expr;
2353 }
2354
2355
2356 /* Function vect_vfa_segment_size.
2357
2358    Create an expression that computes the size of segment
2359    that will be accessed for a data reference.  The functions takes into
2360    account that realignment loads may access one more vector.
2361
2362    Input:
2363      DR: The data reference.
2364      LENGTH_FACTOR: segment length to consider.
2365
2366    Return an expression whose value is the size of segment which will be
2367    accessed by DR.  */
2368
2369 static tree
2370 vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2371 {
2372   tree segment_length;
2373
2374   if (integer_zerop (DR_STEP (dr)))
2375     segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2376   else
2377     segment_length = size_binop (MULT_EXPR,
2378                                  fold_convert (sizetype, DR_STEP (dr)),
2379                                  fold_convert (sizetype, length_factor));
2380
2381   if (vect_supportable_dr_alignment (dr, false)
2382         == dr_explicit_realign_optimized)
2383     {
2384       tree vector_size = TYPE_SIZE_UNIT
2385                           (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2386
2387       segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2388     }
2389   return segment_length;
2390 }
2391
2392
2393 /* Function vect_create_cond_for_alias_checks.
2394
2395    Create a conditional expression that represents the run-time checks for
2396    overlapping of address ranges represented by a list of data references
2397    relations passed as input.
2398
2399    Input:
2400    COND_EXPR  - input conditional expression.  New conditions will be chained
2401                 with logical AND operation.
2402    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2403                 to be checked.
2404
2405    Output:
2406    COND_EXPR - conditional expression.
2407    COND_EXPR_STMT_LIST - statements needed to construct the conditional
2408                          expression.
2409
2410
2411    The returned value is the conditional expression to be used in the if
2412    statement that controls which version of the loop gets executed at runtime.
2413 */
2414
2415 static void
2416 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2417                                    tree * cond_expr,
2418                                    gimple_seq * cond_expr_stmt_list)
2419 {
2420   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2421   vec<ddr_p>  may_alias_ddrs =
2422     LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2423   int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2424   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2425
2426   ddr_p ddr;
2427   unsigned int i;
2428   tree part_cond_expr, length_factor;
2429
2430   /* Create expression
2431      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2432      || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2433      &&
2434      ...
2435      &&
2436      ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2437      || (load_ptr_n + load_segment_length_n) <= store_ptr_n))  */
2438
2439   if (may_alias_ddrs.is_empty ())
2440     return;
2441
2442   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
2443     {
2444       struct data_reference *dr_a, *dr_b;
2445       gimple dr_group_first_a, dr_group_first_b;
2446       tree addr_base_a, addr_base_b;
2447       tree segment_length_a, segment_length_b;
2448       gimple stmt_a, stmt_b;
2449       tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2450
2451       dr_a = DDR_A (ddr);
2452       stmt_a = DR_STMT (DDR_A (ddr));
2453       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2454       if (dr_group_first_a)
2455         {
2456           stmt_a = dr_group_first_a;
2457           dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2458         }
2459
2460       dr_b = DDR_B (ddr);
2461       stmt_b = DR_STMT (DDR_B (ddr));
2462       dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2463       if (dr_group_first_b)
2464         {
2465           stmt_b = dr_group_first_b;
2466           dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2467         }
2468
2469       addr_base_a =
2470         vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2471                                               NULL_TREE, loop);
2472       addr_base_b =
2473         vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2474                                               NULL_TREE, loop);
2475
2476       if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2477         length_factor = scalar_loop_iters;
2478       else
2479         length_factor = size_int (vect_factor);
2480       segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2481       segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2482
2483       if (dump_enabled_p ())
2484         {
2485           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2486                            "create runtime check for data references ");
2487           dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_a));
2488           dump_printf (MSG_OPTIMIZED_LOCATIONS, " and ");
2489           dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, DR_REF (dr_b));
2490         }
2491
2492       seg_a_min = addr_base_a;
2493       seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2494       if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2495         seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2496
2497       seg_b_min = addr_base_b;
2498       seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2499       if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2500         seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2501
2502       part_cond_expr =
2503         fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2504           fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2505           fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2506
2507       if (*cond_expr)
2508         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2509                                   *cond_expr, part_cond_expr);
2510       else
2511         *cond_expr = part_cond_expr;
2512     }
2513
2514   if (dump_enabled_p ())
2515     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2516                      "created %u versioning for alias checks.\n",
2517                      may_alias_ddrs.length ());
2518 }
2519
2520
2521 /* Function vect_loop_versioning.
2522
2523    If the loop has data references that may or may not be aligned or/and
2524    has data reference relations whose independence was not proven then
2525    two versions of the loop need to be generated, one which is vectorized
2526    and one which isn't.  A test is then generated to control which of the
2527    loops is executed.  The test checks for the alignment of all of the
2528    data references that may or may not be aligned.  An additional
2529    sequence of runtime tests is generated for each pairs of DDRs whose
2530    independence was not proven.  The vectorized version of loop is
2531    executed only if both alias and alignment tests are passed.
2532
2533    The test generated to check which version of loop is executed
2534    is modified to also check for profitability as indicated by the
2535    cost model initially.
2536
2537    The versioning precondition(s) are placed in *COND_EXPR and
2538    *COND_EXPR_STMT_LIST.  */
2539
2540 void
2541 vect_loop_versioning (loop_vec_info loop_vinfo,
2542                       unsigned int th, bool check_profitability)
2543 {
2544   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2545   basic_block condition_bb;
2546   gimple_stmt_iterator gsi, cond_exp_gsi;
2547   basic_block merge_bb;
2548   basic_block new_exit_bb;
2549   edge new_exit_e, e;
2550   gimple orig_phi, new_phi;
2551   tree cond_expr = NULL_TREE;
2552   gimple_seq cond_expr_stmt_list = NULL;
2553   tree arg;
2554   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2555   gimple_seq gimplify_stmt_list = NULL;
2556   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2557
2558   if (check_profitability)
2559     {
2560       cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2561                                build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2562       cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2563                                           is_gimple_condexpr, NULL_TREE);
2564     }
2565
2566   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2567     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2568                                        &cond_expr_stmt_list);
2569
2570   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2571     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
2572                                        &cond_expr_stmt_list);
2573
2574   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2575                                       is_gimple_condexpr, NULL_TREE);
2576   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2577
2578   initialize_original_copy_tables ();
2579   loop_version (loop, cond_expr, &condition_bb,
2580                 prob, prob, REG_BR_PROB_BASE - prob, true);
2581   free_original_copy_tables();
2582
2583   /* Loop versioning violates an assumption we try to maintain during
2584      vectorization - that the loop exit block has a single predecessor.
2585      After versioning, the exit block of both loop versions is the same
2586      basic block (i.e. it has two predecessors). Just in order to simplify
2587      following transformations in the vectorizer, we fix this situation
2588      here by adding a new (empty) block on the exit-edge of the loop,
2589      with the proper loop-exit phis to maintain loop-closed-form.  */
2590
2591   merge_bb = single_exit (loop)->dest;
2592   gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2593   new_exit_bb = split_edge (single_exit (loop));
2594   new_exit_e = single_exit (loop);
2595   e = EDGE_SUCC (new_exit_bb, 0);
2596
2597   for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2598     {
2599       tree new_res;
2600       orig_phi = gsi_stmt (gsi);
2601       new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2602       new_phi = create_phi_node (new_res, new_exit_bb);
2603       arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2604       add_phi_arg (new_phi, arg, new_exit_e,
2605                    gimple_phi_arg_location_from_edge (orig_phi, e));
2606       adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2607     }
2608
2609   /* End loop-exit-fixes after versioning.  */
2610
2611   update_ssa (TODO_update_ssa);
2612   if (cond_expr_stmt_list)
2613     {
2614       cond_exp_gsi = gsi_last_bb (condition_bb);
2615       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2616                              GSI_SAME_STMT);
2617     }
2618 }