gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI argumnets
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src)
 106           && (!rename_from_outer_loop || e->src != outer_loop->header))
 107         continue;
 108       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 109            gsi_next (&gsi))
 110         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 111     }
 112 }
 113
 114
 115 struct adjust_info
 116 {
 117   tree from, to;
 118   basic_block bb;
 119 };
 120
 121 /* A stack of values to be adjusted in debug stmts.  We have to
 122    process them LIFO, so that the closest substitution applies.  If we
 123    processed them FIFO, without the stack, we might substitute uses
 124    with a PHI DEF that would soon become non-dominant, and when we got
 125    to the suitable one, it wouldn't have anything to substitute any
 126    more.  */
 127 static vec<adjust_info, va_heap> adjust_vec;
 128
 129 /* Adjust any debug stmts that referenced AI->from values to use the
 130    loop-closed AI->to, if the references are dominated by AI->bb and
 131    not by the definition of AI->from.  */
 132
 133 static void
 134 adjust_debug_stmts_now (adjust_info *ai)
 135 {
 136   basic_block bbphi = ai->bb;
 137   tree orig_def = ai->from;
 138   tree new_def = ai->to;
 139   imm_use_iterator imm_iter;
 140   gimple *stmt;
 141   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 142
 143   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 144
 145   /* Adjust any debug stmts that held onto non-loop-closed
 146      references.  */
 147   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 148     {
 149       use_operand_p use_p;
 150       basic_block bbuse;
 151
 152       if (!is_gimple_debug (stmt))
 153         continue;
 154
 155       gcc_assert (gimple_debug_bind_p (stmt));
 156
 157       bbuse = gimple_bb (stmt);
 158
 159       if ((bbuse == bbphi
 160            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 161           && !(bbuse == bbdef
 162                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 163         {
 164           if (new_def)
 165             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 166               SET_USE (use_p, new_def);
 167           else
 168             {
 169               gimple_debug_bind_reset_value (stmt);
 170               update_stmt (stmt);
 171             }
 172         }
 173     }
 174 }
 175
 176 /* Adjust debug stmts as scheduled before.  */
 177
 178 static void
 179 adjust_vec_debug_stmts (void)
 180 {
 181   if (!MAY_HAVE_DEBUG_STMTS)
 182     return;
 183
 184   gcc_assert (adjust_vec.exists ());
 185
 186   while (!adjust_vec.is_empty ())
 187     {
 188       adjust_debug_stmts_now (&adjust_vec.last ());
 189       adjust_vec.pop ();
 190     }
 191 }
 192
 193 /* Adjust any debug stmts that referenced FROM values to use the
 194    loop-closed TO, if the references are dominated by BB and not by
 195    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 196    will be postponed until adjust_vec_debug_stmts is called.  */
 197
 198 static void
 199 adjust_debug_stmts (tree from, tree to, basic_block bb)
 200 {
 201   adjust_info ai;
 202
 203   if (MAY_HAVE_DEBUG_STMTS
 204       && TREE_CODE (from) == SSA_NAME
 205       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 206       && ! virtual_operand_p (from))
 207     {
 208       ai.from = from;
 209       ai.to = to;
 210       ai.bb = bb;
 211
 212       if (adjust_vec.exists ())
 213         adjust_vec.safe_push (ai);
 214       else
 215         adjust_debug_stmts_now (&ai);
 216     }
 217 }
 218
 219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 220    to adjust any debug stmts that referenced the old phi arg,
 221    presumably non-loop-closed references left over from other
 222    transformations.  */
 223
 224 static void
 225 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 226 {
 227   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 228
 229   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 230
 231   if (MAY_HAVE_DEBUG_STMTS)
 232     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 233                         gimple_bb (update_phi));
 234 }
 235
 236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 237    that starts at zero, increases by one and its limit is NITERS.
 238
 239    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 240
 241 void
 242 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 243 {
 244   tree indx_before_incr, indx_after_incr;
 245   gcond *cond_stmt;
 246   gcond *orig_cond;
 247   edge exit_edge = single_exit (loop);
 248   gimple_stmt_iterator loop_cond_gsi;
 249   gimple_stmt_iterator incr_gsi;
 250   bool insert_after;
 251   tree init = build_int_cst (TREE_TYPE (niters), 0);
 252   tree step = build_int_cst (TREE_TYPE (niters), 1);
 253   source_location loop_loc;
 254   enum tree_code code;
 255
 256   orig_cond = get_loop_exit_condition (loop);
 257   gcc_assert (orig_cond);
 258   loop_cond_gsi = gsi_for_stmt (orig_cond);
 259
 260   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 261   create_iv (init, step, NULL_TREE, loop,
 262              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 263
 264   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 265                                               true, NULL_TREE, true,
 266                                               GSI_SAME_STMT);
 267   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 268                                      true, GSI_SAME_STMT);
 269
 270   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 271   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 272                                  NULL_TREE);
 273
 274   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 275
 276   /* Remove old loop exit test:  */
 277   gsi_remove (&loop_cond_gsi, true);
 278   free_stmt_vec_info (orig_cond);
 279
 280   loop_loc = find_loop_location (loop);
 281   if (dump_enabled_p ())
 282     {
 283       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 284         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 285                      LOCATION_LINE (loop_loc));
 286       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 287     }
 288   loop->nb_iterations = niters;
 289 }
 290
 291 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 292    For all PHI arguments in FROM->dest and TO->dest from those
 293    edges ensure that TO->dest PHI arguments have current_def
 294    to that in from.  */
 295
 296 static void
 297 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 298 {
 299   gimple_stmt_iterator gsi_from, gsi_to;
 300
 301   for (gsi_from = gsi_start_phis (from->dest),
 302        gsi_to = gsi_start_phis (to->dest);
 303        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 304     {
 305       gimple *from_phi = gsi_stmt (gsi_from);
 306       gimple *to_phi = gsi_stmt (gsi_to);
 307       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 308       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 309       if (virtual_operand_p (from_arg))
 310         {
 311           gsi_next (&gsi_from);
 312           continue;
 313         }
 314       if (virtual_operand_p (to_arg))
 315         {
 316           gsi_next (&gsi_to);
 317           continue;
 318         }
 319       if (TREE_CODE (from_arg) != SSA_NAME)
 320         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 321       else
 322         {
 323           if (get_current_def (to_arg) == NULL_TREE)
 324             set_current_def (to_arg, get_current_def (from_arg));
 325         }
 326       gsi_next (&gsi_from);
 327       gsi_next (&gsi_to);
 328     }
 329
 330   gphi *from_phi = get_virtual_phi (from->dest);
 331   gphi *to_phi = get_virtual_phi (to->dest);
 332   if (from_phi)
 333     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 334                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 335 }
 336
 337
 338 /* Given LOOP this function generates a new copy of it and puts it
 339    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 340    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 341    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 342    entry or exit of LOOP.  */
 343
 344 struct loop *
 345 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 346                                         struct loop *scalar_loop, edge e)
 347 {
 348   struct loop *new_loop;
 349   basic_block *new_bbs, *bbs, *pbbs;
 350   bool at_exit;
 351   bool was_imm_dom;
 352   basic_block exit_dest;
 353   edge exit, new_exit;
 354   bool duplicate_outer_loop = false;
 355
 356   exit = single_exit (loop);
 357   at_exit = (e == exit);
 358   if (!at_exit && e != loop_preheader_edge (loop))
 359     return NULL;
 360
 361   if (scalar_loop == NULL)
 362     scalar_loop = loop;
 363
 364   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 365   pbbs = bbs + 1;
 366   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 367   /* Allow duplication of outer loops.  */
 368   if (scalar_loop->inner)
 369     duplicate_outer_loop = true;
 370   /* Check whether duplication is possible.  */
 371   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 372     {
 373       free (bbs);
 374       return NULL;
 375     }
 376
 377   /* Generate new loop structure.  */
 378   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 379   duplicate_subloops (scalar_loop, new_loop);
 380
 381   exit_dest = exit->dest;
 382   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 383                                           exit_dest) == loop->header ?
 384                  true : false);
 385
 386   /* Also copy the pre-header, this avoids jumping through hoops to
 387      duplicate the loop entry PHI arguments.  Create an empty
 388      pre-header unconditionally for this.  */
 389   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 390   edge entry_e = single_pred_edge (preheader);
 391   bbs[0] = preheader;
 392   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 393
 394   exit = single_exit (scalar_loop);
 395   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 396             &exit, 1, &new_exit, NULL,
 397             at_exit ? loop->latch : e->src, true);
 398   exit = single_exit (loop);
 399   basic_block new_preheader = new_bbs[0];
 400
 401   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 402
 403   if (scalar_loop != loop)
 404     {
 405       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 406          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 407          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 408          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 409          header) to have current_def set, so copy them over.  */
 410       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 411                                                 exit);
 412       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 413                                                            0),
 414                                                 EDGE_SUCC (loop->latch, 0));
 415     }
 416
 417   if (at_exit) /* Add the loop copy at exit.  */
 418     {
 419       if (scalar_loop != loop)
 420         {
 421           gphi_iterator gsi;
 422           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 423
 424           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 425                gsi_next (&gsi))
 426             {
 427               gphi *phi = gsi.phi ();
 428               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 429               location_t orig_locus
 430                 = gimple_phi_arg_location_from_edge (phi, e);
 431
 432               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 433             }
 434         }
 435       redirect_edge_and_branch_force (e, new_preheader);
 436       flush_pending_stmts (e);
 437       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 438       if (was_imm_dom || duplicate_outer_loop)
 439         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 440
 441       /* And remove the non-necessary forwarder again.  Keep the other
 442          one so we have a proper pre-header for the loop at the exit edge.  */
 443       redirect_edge_pred (single_succ_edge (preheader),
 444                           single_pred (preheader));
 445       delete_basic_block (preheader);
 446       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 447                                loop_preheader_edge (scalar_loop)->src);
 448     }
 449   else /* Add the copy at entry.  */
 450     {
 451       if (scalar_loop != loop)
 452         {
 453           /* Remove the non-necessary forwarder of scalar_loop again.  */
 454           redirect_edge_pred (single_succ_edge (preheader),
 455                               single_pred (preheader));
 456           delete_basic_block (preheader);
 457           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 458                                    loop_preheader_edge (scalar_loop)->src);
 459           preheader = split_edge (loop_preheader_edge (loop));
 460           entry_e = single_pred_edge (preheader);
 461         }
 462
 463       redirect_edge_and_branch_force (entry_e, new_preheader);
 464       flush_pending_stmts (entry_e);
 465       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 466
 467       redirect_edge_and_branch_force (new_exit, preheader);
 468       flush_pending_stmts (new_exit);
 469       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 470
 471       /* And remove the non-necessary forwarder again.  Keep the other
 472          one so we have a proper pre-header for the loop at the exit edge.  */
 473       redirect_edge_pred (single_succ_edge (new_preheader),
 474                           single_pred (new_preheader));
 475       delete_basic_block (new_preheader);
 476       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 477                                loop_preheader_edge (new_loop)->src);
 478     }
 479
 480   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 481     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 482
 483   if (scalar_loop != loop)
 484     {
 485       /* Update new_loop->header PHIs, so that on the preheader
 486          edge they are the ones from loop rather than scalar_loop.  */
 487       gphi_iterator gsi_orig, gsi_new;
 488       edge orig_e = loop_preheader_edge (loop);
 489       edge new_e = loop_preheader_edge (new_loop);
 490
 491       for (gsi_orig = gsi_start_phis (loop->header),
 492            gsi_new = gsi_start_phis (new_loop->header);
 493            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 494            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 495         {
 496           gphi *orig_phi = gsi_orig.phi ();
 497           gphi *new_phi = gsi_new.phi ();
 498           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 499           location_t orig_locus
 500             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 501
 502           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 503         }
 504     }
 505
 506   free (new_bbs);
 507   free (bbs);
 508
 509   checking_verify_dominators (CDI_DOMINATORS);
 510
 511   return new_loop;
 512 }
 513
 514
 515 /* Given the condition expression COND, put it as the last statement of
 516    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 517    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 518    skip the loop.  PROBABILITY is the skip edge's probability.  */
 519
 520 static edge
 521 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 522                        basic_block guard_to, basic_block dom_bb,
 523                        int probability)
 524 {
 525   gimple_stmt_iterator gsi;
 526   edge new_e, enter_e;
 527   gcond *cond_stmt;
 528   gimple_seq gimplify_stmt_list = NULL;
 529
 530   enter_e = EDGE_SUCC (guard_bb, 0);
 531   enter_e->flags &= ~EDGE_FALLTHRU;
 532   enter_e->flags |= EDGE_FALSE_VALUE;
 533   gsi = gsi_last_bb (guard_bb);
 534
 535   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 536                                  NULL_TREE);
 537   if (gimplify_stmt_list)
 538     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 539
 540   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 541   gsi = gsi_last_bb (guard_bb);
 542   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 543
 544   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 545   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 546
 547   new_e->count = guard_bb->count;
 548   new_e->probability = probability;
 549   new_e->count = apply_probability (enter_e->count, probability);
 550   enter_e->count -= new_e->count;
 551   enter_e->probability = inverse_probability (probability);
 552   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 553   return new_e;
 554 }
 555
 556
 557 /* This function verifies that the following restrictions apply to LOOP:
 558    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 559        for innermost loop and 5 basic blocks for outer-loop.
 560    (2) it is single entry, single exit
 561    (3) its exit condition is the last stmt in the header
 562    (4) E is the entry/exit edge of LOOP.
 563  */
 564
 565 bool
 566 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 567 {
 568   edge exit_e = single_exit (loop);
 569   edge entry_e = loop_preheader_edge (loop);
 570   gcond *orig_cond = get_loop_exit_condition (loop);
 571   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 572   unsigned int num_bb = loop->inner? 5 : 2;
 573
 574   /* All loops have an outer scope; the only case loop->outer is NULL is for
 575      the function itself.  */
 576   if (!loop_outer (loop)
 577       || loop->num_nodes != num_bb
 578       || !empty_block_p (loop->latch)
 579       || !single_exit (loop)
 580       /* Verify that new loop exit condition can be trivially modified.  */
 581       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 582       || (e != exit_e && e != entry_e))
 583     return false;
 584
 585   return true;
 586 }
 587
 588 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 589    in the exit bb and rename all the uses after the loop.  This simplifies
 590    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 591    (but normally loop closed SSA form doesn't require virtual PHIs to be
 592    in the same form).  Doing this early simplifies the checking what
 593    uses should be renamed.  */
 594
 595 static void
 596 create_lcssa_for_virtual_phi (struct loop *loop)
 597 {
 598   gphi_iterator gsi;
 599   edge exit_e = single_exit (loop);
 600
 601   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 602     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 603       {
 604         gphi *phi = gsi.phi ();
 605         for (gsi = gsi_start_phis (exit_e->dest);
 606              !gsi_end_p (gsi); gsi_next (&gsi))
 607           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 608             break;
 609         if (gsi_end_p (gsi))
 610           {
 611             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 612             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 613             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 614             imm_use_iterator imm_iter;
 615             gimple *stmt;
 616             use_operand_p use_p;
 617
 618             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 619             gimple_phi_set_result (new_phi, new_vop);
 620             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 621               if (stmt != new_phi
 622                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 623                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 624                   SET_USE (use_p, new_vop);
 625           }
 626         break;
 627       }
 628
 629 }
 630
 631 /* Function vect_get_loop_location.
 632
 633    Extract the location of the loop in the source code.
 634    If the loop is not well formed for vectorization, an estimated
 635    location is calculated.
 636    Return the loop location if succeed and NULL if not.  */
 637
 638 source_location
 639 find_loop_location (struct loop *loop)
 640 {
 641   gimple *stmt = NULL;
 642   basic_block bb;
 643   gimple_stmt_iterator si;
 644
 645   if (!loop)
 646     return UNKNOWN_LOCATION;
 647
 648   stmt = get_loop_exit_condition (loop);
 649
 650   if (stmt
 651       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 652     return gimple_location (stmt);
 653
 654   /* If we got here the loop is probably not "well formed",
 655      try to estimate the loop location */
 656
 657   if (!loop->header)
 658     return UNKNOWN_LOCATION;
 659
 660   bb = loop->header;
 661
 662   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 663     {
 664       stmt = gsi_stmt (si);
 665       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 666         return gimple_location (stmt);
 667     }
 668
 669   return UNKNOWN_LOCATION;
 670 }
 671
 672 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 673
 674 static bool
 675 iv_phi_p (gphi *phi)
 676 {
 677   if (virtual_operand_p (PHI_RESULT (phi)))
 678     return false;
 679
 680   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 681   gcc_assert (stmt_info != NULL);
 682   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 683       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 684     return false;
 685
 686   return true;
 687 }
 688
 689 /* Function vect_can_advance_ivs_p
 690
 691    In case the number of iterations that LOOP iterates is unknown at compile
 692    time, an epilog loop will be generated, and the loop induction variables
 693    (IVs) will be "advanced" to the value they are supposed to take just before
 694    the epilog loop.  Here we check that the access function of the loop IVs
 695    and the expression that represents the loop bound are simple enough.
 696    These restrictions will be relaxed in the future.  */
 697
 698 bool
 699 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 700 {
 701   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 702   basic_block bb = loop->header;
 703   gphi_iterator gsi;
 704
 705   /* Analyze phi functions of the loop header.  */
 706
 707   if (dump_enabled_p ())
 708     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 709   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 710     {
 711       tree evolution_part;
 712
 713       gphi *phi = gsi.phi ();
 714       if (dump_enabled_p ())
 715         {
 716           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 717           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 718         }
 719
 720       /* Skip virtual phi's. The data dependences that are associated with
 721          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 722
 723          Skip reduction phis.  */
 724       if (!iv_phi_p (phi))
 725         {
 726           if (dump_enabled_p ())
 727             dump_printf_loc (MSG_NOTE, vect_location,
 728                              "reduc or virtual phi. skip.\n");
 729           continue;
 730         }
 731
 732       /* Analyze the evolution function.  */
 733
 734       evolution_part
 735         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 736       if (evolution_part == NULL_TREE)
 737         {
 738           if (dump_enabled_p ())
 739             dump_printf (MSG_MISSED_OPTIMIZATION,
 740                          "No access function or evolution.\n");
 741           return false;
 742         }
 743
 744       /* FORNOW: We do not transform initial conditions of IVs
 745          which evolution functions are not invariants in the loop.  */
 746
 747       if (!expr_invariant_in_loop_p (loop, evolution_part))
 748         {
 749           if (dump_enabled_p ())
 750             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 751                              "evolution not invariant in loop.\n");
 752           return false;
 753         }
 754
 755       /* FORNOW: We do not transform initial conditions of IVs
 756          which evolution functions are a polynomial of degree >= 2.  */
 757
 758       if (tree_is_chrec (evolution_part))
 759         {
 760           if (dump_enabled_p ())
 761             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 762                              "evolution is chrec.\n");
 763           return false;
 764         }
 765     }
 766
 767   return true;
 768 }
 769
 770
 771 /*   Function vect_update_ivs_after_vectorizer.
 772
 773      "Advance" the induction variables of LOOP to the value they should take
 774      after the execution of LOOP.  This is currently necessary because the
 775      vectorizer does not handle induction variables that are used after the
 776      loop.  Such a situation occurs when the last iterations of LOOP are
 777      peeled, because:
 778      1. We introduced new uses after LOOP for IVs that were not originally used
 779         after LOOP: the IVs of LOOP are now used by an epilog loop.
 780      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 781         times, whereas the loop IVs should be bumped N times.
 782
 783      Input:
 784      - LOOP - a loop that is going to be vectorized. The last few iterations
 785               of LOOP were peeled.
 786      - NITERS - the number of iterations that LOOP executes (before it is
 787                 vectorized). i.e, the number of times the ivs should be bumped.
 788      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 789                   coming out from LOOP on which there are uses of the LOOP ivs
 790                   (this is the path from LOOP->exit to epilog_loop->preheader).
 791
 792                   The new definitions of the ivs are placed in LOOP->exit.
 793                   The phi args associated with the edge UPDATE_E in the bb
 794                   UPDATE_E->dest are updated accordingly.
 795
 796      Assumption 1: Like the rest of the vectorizer, this function assumes
 797      a single loop exit that has a single predecessor.
 798
 799      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 800      organized in the same order.
 801
 802      Assumption 3: The access function of the ivs is simple enough (see
 803      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 804
 805      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 806      coming out of LOOP on which the ivs of LOOP are used (this is the path
 807      that leads to the epilog loop; other paths skip the epilog loop).  This
 808      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 809      needs to have its phis updated.
 810  */
 811
 812 static void
 813 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 814                                   tree niters, edge update_e)
 815 {
 816   gphi_iterator gsi, gsi1;
 817   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 818   basic_block update_bb = update_e->dest;
 819   basic_block exit_bb = single_exit (loop)->dest;
 820
 821   /* Make sure there exists a single-predecessor exit bb:  */
 822   gcc_assert (single_pred_p (exit_bb));
 823   gcc_assert (single_succ_edge (exit_bb) == update_e);
 824
 825   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 826        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 827        gsi_next (&gsi), gsi_next (&gsi1))
 828     {
 829       tree init_expr;
 830       tree step_expr, off;
 831       tree type;
 832       tree var, ni, ni_name;
 833       gimple_stmt_iterator last_gsi;
 834
 835       gphi *phi = gsi.phi ();
 836       gphi *phi1 = gsi1.phi ();
 837       if (dump_enabled_p ())
 838         {
 839           dump_printf_loc (MSG_NOTE, vect_location,
 840                            "vect_update_ivs_after_vectorizer: phi: ");
 841           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 842         }
 843
 844       /* Skip reduction and virtual phis.  */
 845       if (!iv_phi_p (phi))
 846         {
 847           if (dump_enabled_p ())
 848             dump_printf_loc (MSG_NOTE, vect_location,
 849                              "reduc or virtual phi. skip.\n");
 850           continue;
 851         }
 852
 853       type = TREE_TYPE (gimple_phi_result (phi));
 854       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 855       step_expr = unshare_expr (step_expr);
 856
 857       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 858          of degree >= 2 or exponential.  */
 859       gcc_assert (!tree_is_chrec (step_expr));
 860
 861       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 862
 863       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 864                          fold_convert (TREE_TYPE (step_expr), niters),
 865                          step_expr);
 866       if (POINTER_TYPE_P (type))
 867         ni = fold_build_pointer_plus (init_expr, off);
 868       else
 869         ni = fold_build2 (PLUS_EXPR, type,
 870                           init_expr, fold_convert (type, off));
 871
 872       var = create_tmp_var (type, "tmp");
 873
 874       last_gsi = gsi_last_bb (exit_bb);
 875       gimple_seq new_stmts = NULL;
 876       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 877       /* Exit_bb shouldn't be empty.  */
 878       if (!gsi_end_p (last_gsi))
 879         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 880       else
 881         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 882
 883       /* Fix phi expressions in the successor bb.  */
 884       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 885     }
 886 }
 887
 888 /* Function vect_gen_prolog_loop_niters
 889
 890    Generate the number of iterations which should be peeled as prolog for the
 891    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 892    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 893    As a result, after the execution of this loop, the data reference DR will
 894    refer to an aligned location.  The following computation is generated:
 895
 896    If the misalignment of DR is known at compile time:
 897      addr_mis = int mis = DR_MISALIGNMENT (dr);
 898    Else, compute address misalignment in bytes:
 899      addr_mis = addr & (vectype_align - 1)
 900
 901    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 902
 903    (elem_size = element type size; an element is the scalar element whose type
 904    is the inner type of the vectype)
 905
 906    The computations will be emitted at the end of BB.  We also compute and
 907    store upper bound (included) of the result in BOUND.
 908
 909    When the step of the data-ref in the loop is not 1 (as in interleaved data
 910    and SLP), the number of iterations of the prolog must be divided by the step
 911    (which is equal to the size of interleaved group).
 912
 913    The above formulas assume that VF == number of elements in the vector. This
 914    may not hold when there are multiple-types in the loop.
 915    In this case, for some data-references in the loop the VF does not represent
 916    the number of elements that fit in the vector.  Therefore, instead of VF we
 917    use TYPE_VECTOR_SUBPARTS.  */
 918
 919 static tree
 920 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 921                              basic_block bb, int *bound)
 922 {
 923   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 924   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 925   tree var;
 926   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 927   gimple_seq stmts = NULL, new_stmts = NULL;
 928   tree iters, iters_name;
 929   gimple *dr_stmt = DR_STMT (dr);
 930   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 931   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 932   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 933   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 934
 935   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 936     {
 937       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 938
 939       if (dump_enabled_p ())
 940         dump_printf_loc (MSG_NOTE, vect_location,
 941                          "known peeling = %d.\n", npeel);
 942
 943       iters = build_int_cst (niters_type, npeel);
 944       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 945     }
 946   else
 947     {
 948       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 949       tree offset = negative
 950           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 951       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 952                                                 &stmts, offset, loop);
 953       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 954       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 955       HOST_WIDE_INT elem_size =
 956                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 957       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 958       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 959       tree nelements_tree = build_int_cst (type, nelements);
 960       tree byte_misalign;
 961       tree elem_misalign;
 962
 963       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 964       byte_misalign =
 965         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 966                      vectype_align_minus_1);
 967
 968       /* Create:  elem_misalign = byte_misalign / element_size  */
 969       elem_misalign =
 970         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 971
 972       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
 973       if (negative)
 974         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
 975       else
 976         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
 977       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
 978       iters = fold_convert (niters_type, iters);
 979       *bound = nelements - 1;
 980     }
 981
 982   if (dump_enabled_p ())
 983     {
 984       dump_printf_loc (MSG_NOTE, vect_location,
 985                        "niters for prolog loop: ");
 986       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
 987       dump_printf (MSG_NOTE, "\n");
 988     }
 989
 990   var = create_tmp_var (niters_type, "prolog_loop_niters");
 991   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
 992
 993   if (new_stmts)
 994     gimple_seq_add_seq (&stmts, new_stmts);
 995   if (stmts)
 996     {
 997       gcc_assert (single_succ_p (bb));
 998       gimple_stmt_iterator gsi = gsi_last_bb (bb);
 999       if (gsi_end_p (gsi))
1000         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1001       else
1002         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1003     }
1004   return iters_name;
1005 }
1006
1007
1008 /* Function vect_update_init_of_dr
1009
1010    NITERS iterations were peeled from LOOP.  DR represents a data reference
1011    in LOOP.  This function updates the information recorded in DR to
1012    account for the fact that the first NITERS iterations had already been
1013    executed.  Specifically, it updates the OFFSET field of DR.  */
1014
1015 static void
1016 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1017 {
1018   tree offset = DR_OFFSET (dr);
1019
1020   niters = fold_build2 (MULT_EXPR, sizetype,
1021                         fold_convert (sizetype, niters),
1022                         fold_convert (sizetype, DR_STEP (dr)));
1023   offset = fold_build2 (PLUS_EXPR, sizetype,
1024                         fold_convert (sizetype, offset), niters);
1025   DR_OFFSET (dr) = offset;
1026 }
1027
1028
1029 /* Function vect_update_inits_of_drs
1030
1031    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1032    This function updates the information recorded for the data references in
1033    the loop to account for the fact that the first NITERS iterations had
1034    already been executed.  Specifically, it updates the initial_condition of
1035    the access_function of all the data_references in the loop.  */
1036
1037 static void
1038 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1039 {
1040   unsigned int i;
1041   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1042   struct data_reference *dr;
1043
1044   if (dump_enabled_p ())
1045     dump_printf_loc (MSG_NOTE, vect_location,
1046                      "=== vect_update_inits_of_dr ===\n");
1047
1048   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1049   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1050     {
1051       gimple_seq seq;
1052       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1053       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1054
1055       niters = fold_convert (sizetype, niters);
1056       niters = force_gimple_operand (niters, &seq, false, var);
1057       if (seq)
1058         {
1059           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1060           gcc_assert (!new_bb);
1061         }
1062     }
1063
1064   FOR_EACH_VEC_ELT (datarefs, i, dr)
1065     vect_update_init_of_dr (dr, niters);
1066 }
1067
1068
1069 /* This function builds ni_name = number of iterations.  Statements
1070    are emitted on the loop preheader edge.  */
1071
1072 tree
1073 vect_build_loop_niters (loop_vec_info loop_vinfo)
1074 {
1075   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1076   if (TREE_CODE (ni) == INTEGER_CST)
1077     return ni;
1078   else
1079     {
1080       tree ni_name, var;
1081       gimple_seq stmts = NULL;
1082       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1083
1084       var = create_tmp_var (TREE_TYPE (ni), "niters");
1085       ni_name = force_gimple_operand (ni, &stmts, false, var);
1086       if (stmts)
1087         gsi_insert_seq_on_edge_immediate (pe, stmts);
1088
1089       return ni_name;
1090     }
1091 }
1092
1093 /* Calculate the number of iterations above which vectorized loop will be
1094    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1095    of prolog loop.  If it's integer const, the integer number is also passed
1096    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1097    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1098    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1099    (rather than vectorized) loop will be executed.  This function stores
1100    upper bound (included) of the result in BOUND_SCALAR.  */
1101
1102 static tree
1103 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1104                              int bound_prolog, int vfm1, int th,
1105                              int *bound_scalar, bool check_profitability)
1106 {
1107   tree type = TREE_TYPE (niters_prolog);
1108   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1109                              build_int_cst (type, vfm1));
1110
1111   *bound_scalar = vfm1 + bound_prolog;
1112   if (check_profitability)
1113     {
1114       /* TH indicates the minimum niters of vectorized loop, while we
1115          compute the maximum niters of scalar loop.  */
1116       th--;
1117       /* Peeling for constant times.  */
1118       if (int_niters_prolog >= 0)
1119         {
1120           *bound_scalar = (int_niters_prolog + vfm1 < th
1121                             ? th
1122                             : vfm1 + int_niters_prolog);
1123           return build_int_cst (type, *bound_scalar);
1124         }
1125       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1126          bound (inlcuded) of niters of prolog loop.  */
1127       if (th >=  vfm1 + bound_prolog)
1128         {
1129           *bound_scalar = th;
1130           return build_int_cst (type, th);
1131         }
1132       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1133       else if (th > vfm1)
1134         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1135     }
1136   return niters;
1137 }
1138
1139 /* This function generates the following statements:
1140
1141    niters = number of iterations loop executes (after peeling)
1142    niters_vector = niters / vf
1143
1144    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1145    true if NITERS doesn't overflow.  */
1146
1147 void
1148 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1149                              tree *niters_vector_ptr, bool niters_no_overflow)
1150 {
1151   tree ni_minus_gap, var;
1152   tree niters_vector;
1153   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1154   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1155   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1156
1157   /* If epilogue loop is required because of data accesses with gaps, we
1158      subtract one iteration from the total number of iterations here for
1159      correct calculation of RATIO.  */
1160   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1161     {
1162       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1163                                   niters,
1164                                   build_one_cst (TREE_TYPE (niters)));
1165       if (!is_gimple_val (ni_minus_gap))
1166         {
1167           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1168           gimple *stmts = NULL;
1169           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1170                                                true, var);
1171           gsi_insert_seq_on_edge_immediate (pe, stmts);
1172         }
1173     }
1174   else
1175     ni_minus_gap = niters;
1176
1177   /* Create: niters >> log2(vf) */
1178   /* If it's known that niters == number of latch executions + 1 doesn't
1179      overflow, we can generate niters >> log2(vf); otherwise we generate
1180      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1181      will be at least one.  */
1182   if (niters_no_overflow)
1183     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1184                                  ni_minus_gap, log_vf);
1185   else
1186     niters_vector
1187       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1188                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1189                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1190                                                ni_minus_gap,
1191                                                build_int_cst
1192                                                  (TREE_TYPE (niters), vf)),
1193                                   log_vf),
1194                      build_int_cst (TREE_TYPE (niters), 1));
1195
1196   if (!is_gimple_val (niters_vector))
1197     {
1198       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1199       gimple *stmts = NULL;
1200       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1201       gsi_insert_seq_on_edge_immediate (pe, stmts);
1202     }
1203   *niters_vector_ptr = niters_vector;
1204
1205   return;
1206 }
1207
1208 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1209    loop specified by LOOP_VINFO after vectorization, compute the number
1210    of iterations before vectorization (niters_vector * vf) and store it
1211    to NITERS_VECTOR_MULT_VF_PTR.  */
1212
1213 static void
1214 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1215                                      tree niters_vector,
1216                                      tree *niters_vector_mult_vf_ptr)
1217 {
1218   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1219   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1220   tree type = TREE_TYPE (niters_vector);
1221   tree log_vf = build_int_cst (type, exact_log2 (vf));
1222   basic_block exit_bb = single_exit (loop)->dest;
1223
1224   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1225   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1226                                             niters_vector, log_vf);
1227   if (!is_gimple_val (niters_vector_mult_vf))
1228     {
1229       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1230       gimple_seq stmts = NULL;
1231       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1232                                                     &stmts, true, var);
1233       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1234       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1235     }
1236   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1237 }
1238
1239 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1240    from SECOND/FIRST and puts it at the original loop's preheader/exit
1241    edge, the two loops are arranged as below:
1242
1243        preheader_a:
1244      first_loop:
1245        header_a:
1246          i_1 = PHI<i_0, i_2>;
1247          ...
1248          i_2 = i_1 + 1;
1249          if (cond_a)
1250            goto latch_a;
1251          else
1252            goto between_bb;
1253        latch_a:
1254          goto header_a;
1255
1256        between_bb:
1257          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1258
1259      second_loop:
1260        header_b:
1261          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1262                                  or with i_2 if no LCSSA phi is created
1263                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1264          ...
1265          i_4 = i_3 + 1;
1266          if (cond_b)
1267            goto latch_b;
1268          else
1269            goto exit_bb;
1270        latch_b:
1271          goto header_b;
1272
1273        exit_bb:
1274
1275    This function creates loop closed SSA for the first loop; update the
1276    second loop's PHI nodes by replacing argument on incoming edge with the
1277    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1278    is false, Loop closed ssa phis will only be created for non-iv phis for
1279    the first loop.
1280
1281    This function assumes exit bb of the first loop is preheader bb of the
1282    second loop, i.e, between_bb in the example code.  With PHIs updated,
1283    the second loop will execute rest iterations of the first.  */
1284
1285 static void
1286 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1287                                    struct loop *first, struct loop *second,
1288                                    bool create_lcssa_for_iv_phis)
1289 {
1290   gphi_iterator gsi_update, gsi_orig;
1291   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1292
1293   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1294   edge second_preheader_e = loop_preheader_edge (second);
1295   basic_block between_bb = single_exit (first)->dest;
1296
1297   gcc_assert (between_bb == second_preheader_e->src);
1298   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1299   /* Either the first loop or the second is the loop to be vectorized.  */
1300   gcc_assert (loop == first || loop == second);
1301
1302   for (gsi_orig = gsi_start_phis (first->header),
1303        gsi_update = gsi_start_phis (second->header);
1304        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1305        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1306     {
1307       gphi *orig_phi = gsi_orig.phi ();
1308       gphi *update_phi = gsi_update.phi ();
1309
1310       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1311       /* Generate lcssa PHI node for the first loop.  */
1312       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1313       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1314         {
1315           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1316           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1317           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1318           arg = new_res;
1319         }
1320
1321       /* Update PHI node in the second loop by replacing arg on the loop's
1322          incoming edge.  */
1323       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1324     }
1325 }
1326
1327 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1328    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1329    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1330    appear like below:
1331
1332        guard_bb:
1333          if (cond)
1334            goto merge_bb;
1335          else
1336            goto skip_loop;
1337
1338      skip_loop:
1339        header_a:
1340          i_1 = PHI<i_0, i_2>;
1341          ...
1342          i_2 = i_1 + 1;
1343          if (cond_a)
1344            goto latch_a;
1345          else
1346            goto exit_a;
1347        latch_a:
1348          goto header_a;
1349
1350        exit_a:
1351          i_5 = PHI<i_2>;
1352
1353        merge_bb:
1354          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1355
1356      update_loop:
1357        header_b:
1358          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1359          ...
1360          i_4 = i_3 + 1;
1361          if (cond_b)
1362            goto latch_b;
1363          else
1364            goto exit_bb;
1365        latch_b:
1366          goto header_b;
1367
1368        exit_bb:
1369
1370    This function creates PHI nodes at merge_bb and replaces the use of i_5
1371    in the update_loop's PHI node with the result of new PHI result.  */
1372
1373 static void
1374 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1375                                     struct loop *update_loop,
1376                                     edge guard_edge, edge merge_edge)
1377 {
1378   source_location merge_loc, guard_loc;
1379   edge orig_e = loop_preheader_edge (skip_loop);
1380   edge update_e = loop_preheader_edge (update_loop);
1381   gphi_iterator gsi_orig, gsi_update;
1382
1383   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1384         gsi_update = gsi_start_phis (update_loop->header));
1385        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1386        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1387     {
1388       gphi *orig_phi = gsi_orig.phi ();
1389       gphi *update_phi = gsi_update.phi ();
1390
1391       /* Generate new phi node at merge bb of the guard.  */
1392       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1393       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1394
1395       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1396          args in NEW_PHI for these edges.  */
1397       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1398       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1399       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1400       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1401       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1402       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1403
1404       /* Update phi in UPDATE_PHI.  */
1405       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1406     }
1407 }
1408
1409 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1410    this function searches for the corresponding lcssa phi node in exit
1411    bb of LOOP.  If it is found, return the phi result; otherwise return
1412    NULL.  */
1413
1414 static tree
1415 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1416                 gphi *lcssa_phi)
1417 {
1418   gphi_iterator gsi;
1419   edge e = single_exit (loop);
1420
1421   gcc_assert (single_pred_p (e->dest));
1422   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1423     {
1424       gphi *phi = gsi.phi ();
1425       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1426                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1427         return PHI_RESULT (phi);
1428     }
1429   return NULL_TREE;
1430 }
1431
1432 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1433    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1434    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1435    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1436    The CFG looks like:
1437
1438      loop:
1439        header_a:
1440          i_1 = PHI<i_0, i_2>;
1441          ...
1442          i_2 = i_1 + 1;
1443          if (cond_a)
1444            goto latch_a;
1445          else
1446            goto exit_a;
1447        latch_a:
1448          goto header_a;
1449
1450        exit_a:
1451
1452        guard_bb:
1453          if (cond)
1454            goto merge_bb;
1455          else
1456            goto epilog_loop;
1457
1458        ;; fall_through_bb
1459
1460      epilog_loop:
1461        header_b:
1462          i_3 = PHI<i_2, i_4>;
1463          ...
1464          i_4 = i_3 + 1;
1465          if (cond_b)
1466            goto latch_b;
1467          else
1468            goto merge_bb;
1469        latch_b:
1470          goto header_b;
1471
1472        merge_bb:
1473          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1474
1475        exit_bb:
1476          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1477
1478    For each name used out side EPILOG (i.e - for each name that has a lcssa
1479    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1480    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1481    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1482    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1483    in exit_bb will also be updated.  */
1484
1485 static void
1486 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1487                                     edge guard_edge, edge merge_edge)
1488 {
1489   gphi_iterator gsi;
1490   basic_block merge_bb = guard_edge->dest;
1491
1492   gcc_assert (single_succ_p (merge_bb));
1493   edge e = single_succ_edge (merge_bb);
1494   basic_block exit_bb = e->dest;
1495   gcc_assert (single_pred_p (exit_bb));
1496   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1497
1498   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1499     {
1500       gphi *update_phi = gsi.phi ();
1501       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1502       /* This loop-closed-phi actually doesn't represent a use out of the
1503          loop - the phi arg is a constant.  */
1504       if (TREE_CODE (old_arg) != SSA_NAME)
1505         continue;
1506
1507       tree merge_arg = get_current_def (old_arg);
1508       if (!merge_arg)
1509         merge_arg = old_arg;
1510
1511       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1512       /* If the var is live after loop but not a reduction, we simply
1513          use the old arg.  */
1514       if (!guard_arg)
1515         guard_arg = old_arg;
1516
1517       /* Create new phi node in MERGE_BB:  */
1518       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1519       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1520
1521       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1522          the two PHI args in merge_phi for these edges.  */
1523       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1524       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1525
1526       /* Update the original phi in exit_bb.  */
1527       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1528     }
1529 }
1530
1531 /* EPILOG loop is duplicated from the original loop for vectorizing,
1532    the arg of its loop closed ssa PHI needs to be updated.  */
1533
1534 static void
1535 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1536 {
1537   gphi_iterator gsi;
1538   basic_block exit_bb = single_exit (epilog)->dest;
1539
1540   gcc_assert (single_pred_p (exit_bb));
1541   edge e = EDGE_PRED (exit_bb, 0);
1542   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1543     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1544 }
1545
1546 /* Function vect_do_peeling.
1547
1548    Input:
1549    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1550
1551        preheader:
1552      LOOP:
1553        header_bb:
1554          loop_body
1555          if (exit_loop_cond) goto exit_bb
1556          else                goto header_bb
1557        exit_bb:
1558
1559    - NITERS: The number of iterations of the loop.
1560    - NITERSM1: The number of iterations of the loop's latch.
1561    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1562    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1563                               CHECK_PROFITABILITY is true.
1564    Output:
1565    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1566
1567    This function peels prolog and epilog from the loop, adds guards skipping
1568    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1569    would look like:
1570
1571        guard_bb_1:
1572          if (prefer_scalar_loop) goto merge_bb_1
1573          else                    goto guard_bb_2
1574
1575        guard_bb_2:
1576          if (skip_prolog) goto merge_bb_2
1577          else             goto prolog_preheader
1578
1579        prolog_preheader:
1580      PROLOG:
1581        prolog_header_bb:
1582          prolog_body
1583          if (exit_prolog_cond) goto prolog_exit_bb
1584          else                  goto prolog_header_bb
1585        prolog_exit_bb:
1586
1587        merge_bb_2:
1588
1589        vector_preheader:
1590      VECTOR LOOP:
1591        vector_header_bb:
1592          vector_body
1593          if (exit_vector_cond) goto vector_exit_bb
1594          else                  goto vector_header_bb
1595        vector_exit_bb:
1596
1597        guard_bb_3:
1598          if (skip_epilog) goto merge_bb_3
1599          else             goto epilog_preheader
1600
1601        merge_bb_1:
1602
1603        epilog_preheader:
1604      EPILOG:
1605        epilog_header_bb:
1606          epilog_body
1607          if (exit_epilog_cond) goto merge_bb_3
1608          else                  goto epilog_header_bb
1609
1610        merge_bb_3:
1611
1612    Note this function peels prolog and epilog only if it's necessary,
1613    as well as guards.
1614
1615    TODO: Guard for prefer_scalar_loop should be emitted along with
1616    versioning conditions if loop versioning is needed.  */
1617
1618 void
1619 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1620                  tree *niters_vector, int th, bool check_profitability,
1621                  bool niters_no_overflow)
1622 {
1623   edge e, guard_e;
1624   tree type = TREE_TYPE (niters), guard_cond;
1625   basic_block guard_bb, guard_to;
1626   int prob_prolog, prob_vector, prob_epilog;
1627   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1628   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1629   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1630   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1631                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1632
1633   if (!prolog_peeling && !epilog_peeling)
1634     return;
1635
1636   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1637   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1638     vf = 3;
1639   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1640   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1641
1642   struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1643   struct loop *first_loop = loop;
1644   create_lcssa_for_virtual_phi (loop);
1645   update_ssa (TODO_update_ssa_only_virtuals);
1646
1647   if (MAY_HAVE_DEBUG_STMTS)
1648     {
1649       gcc_assert (!adjust_vec.exists ());
1650       adjust_vec.create (32);
1651     }
1652   initialize_original_copy_tables ();
1653
1654   /* Prolog loop may be skipped.  */
1655   bool skip_prolog = (prolog_peeling != 0);
1656   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1657      we peel for epilog loop.  */
1658   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1659   /* Epilog loop must be executed if the number of iterations for epilog
1660      loop is known at compile time, otherwise we need to add a check at
1661      the end of vector loop and skip to the end of epilog loop.  */
1662   bool skip_epilog = (prolog_peeling < 0
1663                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1664   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1665   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1666     skip_epilog = false;
1667
1668   /* Record the anchor bb at which guard should be placed if scalar loop
1669      may be preferred.  */
1670   basic_block anchor = loop_preheader_edge (loop)->src;
1671   if (skip_vector)
1672     split_edge (loop_preheader_edge (loop));
1673
1674   tree niters_prolog = build_int_cst (type, 0);
1675   source_location loop_loc = find_loop_location (loop);
1676   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1677   if (prolog_peeling)
1678     {
1679       e = loop_preheader_edge (loop);
1680       if (!slpeel_can_duplicate_loop_p (loop, e))
1681         {
1682           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1683                            "loop can't be duplicated to preheader edge.\n");
1684           gcc_unreachable ();
1685         }
1686       /* Peel prolog and put it on preheader edge of loop.  */
1687       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1688       if (!prolog)
1689         {
1690           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1691                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1692           gcc_unreachable ();
1693         }
1694       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1695       first_loop = prolog;
1696       reset_original_copy_tables ();
1697
1698       /* Generate and update the number of iterations for prolog loop.  */
1699       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1700                                                    &bound_prolog);
1701       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1702
1703       /* Skip the prolog loop.  */
1704       if (skip_prolog)
1705         {
1706           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1707                                     niters_prolog, build_int_cst (type, 0));
1708           guard_bb = loop_preheader_edge (prolog)->src;
1709           guard_to = split_edge (loop_preheader_edge (loop));
1710           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1711                                            guard_to, guard_bb,
1712                                            inverse_probability (prob_prolog));
1713           e = EDGE_PRED (guard_to, 0);
1714           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1715           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1716           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1717         }
1718       /* Update init address of DRs.  */
1719       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1720       /* Update niters for vector loop.  */
1721       LOOP_VINFO_NITERS (loop_vinfo)
1722         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1723       LOOP_VINFO_NITERSM1 (loop_vinfo)
1724         = fold_build2 (MINUS_EXPR, type,
1725                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1726       niters = vect_build_loop_niters (loop_vinfo);
1727
1728       /* Prolog iterates at most bound_prolog times, latch iterates at
1729          most bound_prolog - 1 times.  */
1730       record_niter_bound (prolog, bound_prolog - 1, false, true);
1731       delete_update_ssa ();
1732       adjust_vec_debug_stmts ();
1733       scev_reset ();
1734     }
1735
1736   if (epilog_peeling)
1737     {
1738       e = single_exit (loop);
1739       if (!slpeel_can_duplicate_loop_p (loop, e))
1740         {
1741           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1742                            "loop can't be duplicated to exit edge.\n");
1743           gcc_unreachable ();
1744         }
1745       /* Peel epilog and put it on exit edge of loop.  */
1746       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1747       if (!epilog)
1748         {
1749           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1750                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1751           gcc_unreachable ();
1752         }
1753       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1754
1755       /* Scalar version loop may be preferred.  In this case, add guard
1756          and skip to epilog.  Note this only happens when the number of
1757          iterations of loop is unknown at compile time, otherwise this
1758          won't be vectorized.  */
1759       if (skip_vector)
1760         {
1761           /* Additional epilogue iteration is peeled if gap exists.  */
1762           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1763           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1764                                                 bound_prolog,
1765                                                 peel_for_gaps ? vf : vf - 1,
1766                                                 th, &bound_scalar,
1767                                                 check_profitability);
1768           /* Build guard against NITERSM1 since NITERS may overflow.  */
1769           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1770           guard_bb = anchor;
1771           guard_to = split_edge (loop_preheader_edge (epilog));
1772           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1773                                            guard_to, guard_bb,
1774                                            inverse_probability (prob_vector));
1775           e = EDGE_PRED (guard_to, 0);
1776           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1777           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1778           scale_loop_profile (epilog, prob_vector, bound_scalar);
1779         }
1780
1781       tree niters_vector_mult_vf;
1782       /* If loop is peeled for non-zero constant times, now niters refers to
1783          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1784          overflows.  */
1785       niters_no_overflow |= (prolog_peeling > 0);
1786       vect_gen_vector_loop_niters (loop_vinfo, niters,
1787                                    niters_vector, niters_no_overflow);
1788       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1789                                            &niters_vector_mult_vf);
1790       /* Update IVs of original loop as if they were advanced by
1791          niters_vector_mult_vf steps.  */
1792       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1793       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1794       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1795                                         update_e);
1796
1797       if (skip_epilog)
1798         {
1799           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1800                                     niters, niters_vector_mult_vf);
1801           guard_bb = single_exit (loop)->dest;
1802           guard_to = split_edge (single_exit (epilog));
1803           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1804                                            skip_vector ? anchor : guard_bb,
1805                                            inverse_probability (prob_epilog));
1806           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1807                                               single_exit (epilog));
1808           scale_loop_profile (epilog, prob_epilog, bound);
1809         }
1810       else
1811         slpeel_update_phi_nodes_for_lcssa (epilog);
1812
1813       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1814       /* We share epilog loop with scalar version loop.  */
1815       bound = MAX (bound, bound_scalar - 1);
1816       record_niter_bound (epilog, bound, false, true);
1817
1818       delete_update_ssa ();
1819       adjust_vec_debug_stmts ();
1820       scev_reset ();
1821     }
1822   adjust_vec.release ();
1823   free_original_copy_tables ();
1824 }
1825
1826 /* Function vect_create_cond_for_niters_checks.
1827
1828    Create a conditional expression that represents the run-time checks for
1829    loop's niter.  The loop is guaranteed to to terminate if the run-time
1830    checks hold.
1831
1832    Input:
1833    COND_EXPR  - input conditional expression.  New conditions will be chained
1834                 with logical AND operation.  If it is NULL, then the function
1835                 is used to return the number of alias checks.
1836    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1837                 to be checked.
1838
1839    Output:
1840    COND_EXPR - conditional expression.
1841
1842    The returned COND_EXPR is the conditional expression to be used in the
1843    if statement that controls which version of the loop gets executed at
1844    runtime.  */
1845
1846 static void
1847 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1848 {
1849   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1850
1851   if (*cond_expr)
1852     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1853                               *cond_expr, part_cond_expr);
1854   else
1855     *cond_expr = part_cond_expr;
1856 }
1857
1858 /* Function vect_create_cond_for_align_checks.
1859
1860    Create a conditional expression that represents the alignment checks for
1861    all of data references (array element references) whose alignment must be
1862    checked at runtime.
1863
1864    Input:
1865    COND_EXPR  - input conditional expression.  New conditions will be chained
1866                 with logical AND operation.
1867    LOOP_VINFO - two fields of the loop information are used.
1868                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1869                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1870
1871    Output:
1872    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1873                          expression.
1874    The returned value is the conditional expression to be used in the if
1875    statement that controls which version of the loop gets executed at runtime.
1876
1877    The algorithm makes two assumptions:
1878      1) The number of bytes "n" in a vector is a power of 2.
1879      2) An address "a" is aligned if a%n is zero and that this
1880         test can be done as a&(n-1) == 0.  For example, for 16
1881         byte vectors the test is a&0xf == 0.  */
1882
1883 static void
1884 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1885                                    tree *cond_expr,
1886                                    gimple_seq *cond_expr_stmt_list)
1887 {
1888   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1889   vec<gimple *> may_misalign_stmts
1890     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1891   gimple *ref_stmt;
1892   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1893   tree mask_cst;
1894   unsigned int i;
1895   tree int_ptrsize_type;
1896   char tmp_name[20];
1897   tree or_tmp_name = NULL_TREE;
1898   tree and_tmp_name;
1899   gimple *and_stmt;
1900   tree ptrsize_zero;
1901   tree part_cond_expr;
1902
1903   /* Check that mask is one less than a power of 2, i.e., mask is
1904      all zeros followed by all ones.  */
1905   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1906
1907   int_ptrsize_type = signed_type_for (ptr_type_node);
1908
1909   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1910      of the first vector of the i'th data reference. */
1911
1912   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1913     {
1914       gimple_seq new_stmt_list = NULL;
1915       tree addr_base;
1916       tree addr_tmp_name;
1917       tree new_or_tmp_name;
1918       gimple *addr_stmt, *or_stmt;
1919       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1920       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1921       bool negative = tree_int_cst_compare
1922         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1923       tree offset = negative
1924         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1925
1926       /* create: addr_tmp = (int)(address_of_first_vector) */
1927       addr_base =
1928         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1929                                               offset, loop);
1930       if (new_stmt_list != NULL)
1931         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1932
1933       sprintf (tmp_name, "addr2int%d", i);
1934       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1935       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1936       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1937
1938       /* The addresses are OR together.  */
1939
1940       if (or_tmp_name != NULL_TREE)
1941         {
1942           /* create: or_tmp = or_tmp | addr_tmp */
1943           sprintf (tmp_name, "orptrs%d", i);
1944           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1945           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1946                                          or_tmp_name, addr_tmp_name);
1947           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1948           or_tmp_name = new_or_tmp_name;
1949         }
1950       else
1951         or_tmp_name = addr_tmp_name;
1952
1953     } /* end for i */
1954
1955   mask_cst = build_int_cst (int_ptrsize_type, mask);
1956
1957   /* create: and_tmp = or_tmp & mask  */
1958   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1959
1960   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1961                                   or_tmp_name, mask_cst);
1962   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1963
1964   /* Make and_tmp the left operand of the conditional test against zero.
1965      if and_tmp has a nonzero bit then some address is unaligned.  */
1966   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1967   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1968                                 and_tmp_name, ptrsize_zero);
1969   if (*cond_expr)
1970     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1971                               *cond_expr, part_cond_expr);
1972   else
1973     *cond_expr = part_cond_expr;
1974 }
1975
1976 /* Given two data references and segment lengths described by DR_A and DR_B,
1977    create expression checking if the two addresses ranges intersect with
1978    each other based on index of the two addresses.  This can only be done
1979    if DR_A and DR_B referring to the same (array) object and the index is
1980    the only difference.  For example:
1981
1982                        DR_A                           DR_B
1983       data-ref         arr[i]                         arr[j]
1984       base_object      arr                            arr
1985       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1986
1987    The addresses and their index are like:
1988
1989         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1990      ------------------------------------------------------->
1991         |   |   |   |   |          |   |   |   |   |
1992      ------------------------------------------------------->
1993         i_0 ...         i_0+4      j_0 ...         j_0+4
1994
1995    We can create expression based on index rather than address:
1996
1997      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1998
1999    Note evolution step of index needs to be considered in comparison.  */
2000
2001 static bool
2002 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2003                                      const dr_with_seg_len& dr_a,
2004                                      const dr_with_seg_len& dr_b)
2005 {
2006   if (integer_zerop (DR_STEP (dr_a.dr))
2007       || integer_zerop (DR_STEP (dr_b.dr))
2008       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2009     return false;
2010
2011   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2012     return false;
2013
2014   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2015     return false;
2016
2017   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2018     return false;
2019
2020   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2021     return false;
2022
2023   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2024
2025   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2026   unsigned HOST_WIDE_INT abs_step
2027     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2028
2029   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2030   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2031   /* Infer the number of iterations with which the memory segment is accessed
2032      by DR.  In other words, alias is checked if memory segment accessed by
2033      DR_A in some iterations intersect with memory segment accessed by DR_B
2034      in the same amount iterations.
2035      Note segnment length is a linear function of number of iterations with
2036      DR_STEP as the coefficient.  */
2037   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2038   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2039
2040   unsigned int i;
2041   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2042   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2043     {
2044       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2045       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2046       /* Two indices must be the same if they are not scev, or not scev wrto
2047          current loop being vecorized.  */
2048       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2049           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2050           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2051           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2052         {
2053           if (operand_equal_p (access1, access2, 0))
2054             continue;
2055
2056           return false;
2057         }
2058       /* The two indices must have the same step.  */
2059       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2060         return false;
2061
2062       tree idx_step = CHREC_RIGHT (access1);
2063       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2064       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2065       /* Index must evaluate in the same direction as DR.  */
2066       gcc_assert (!neg_step
2067                   || tree_int_cst_compare (idx_step, size_zero_node) < 0);
2068
2069       tree min1 = CHREC_LEFT (access1);
2070       tree min2 = CHREC_LEFT (access2);
2071       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2072         return false;
2073
2074       /* Ideally, alias can be checked against loop's control IV, but we
2075          need to prove linear mapping between control IV and reference
2076          index.  Although that should be true, we check against (array)
2077          index of data reference.  Like segment length, index length is
2078          linear function of the number of iterations with index_step as
2079          the coefficient, i.e, niter_len * idx_step.  */
2080       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2081                                    build_int_cst (TREE_TYPE (min1),
2082                                                   niter_len1));
2083       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2084                                    build_int_cst (TREE_TYPE (min2),
2085                                                   niter_len2));
2086       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2087       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2088       /* Adjust ranges for negative step.  */
2089       if (neg_step)
2090         {
2091           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2092           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2093                               CHREC_LEFT (access1), idx_step);
2094           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2095           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2096                               CHREC_LEFT (access2), idx_step);
2097         }
2098       tree part_cond_expr
2099         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2100             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2101             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2102       if (*cond_expr)
2103         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2104                                   *cond_expr, part_cond_expr);
2105       else
2106         *cond_expr = part_cond_expr;
2107     }
2108   return true;
2109 }
2110
2111 /* Given two data references and segment lengths described by DR_A and DR_B,
2112    create expression checking if the two addresses ranges intersect with
2113    each other:
2114
2115      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2116      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2117
2118 static void
2119 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2120                                const dr_with_seg_len& dr_a,
2121                                const dr_with_seg_len& dr_b)
2122 {
2123   *cond_expr = NULL_TREE;
2124   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2125     return;
2126
2127   tree segment_length_a = dr_a.seg_len;
2128   tree segment_length_b = dr_b.seg_len;
2129   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2130   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2131   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2132
2133   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2134                           offset_a, DR_INIT (dr_a.dr));
2135   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2136                           offset_b, DR_INIT (dr_b.dr));
2137   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2138   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2139
2140   tree seg_a_min = addr_base_a;
2141   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2142   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2143      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2144      [a, a+12) */
2145   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2146     {
2147       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2148       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2149       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2150     }
2151
2152   tree seg_b_min = addr_base_b;
2153   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2154   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2155     {
2156       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2157       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2158       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2159     }
2160   *cond_expr
2161     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2162         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2163         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2164 }
2165
2166 /* Function vect_create_cond_for_alias_checks.
2167
2168    Create a conditional expression that represents the run-time checks for
2169    overlapping of address ranges represented by a list of data references
2170    relations passed as input.
2171
2172    Input:
2173    COND_EXPR  - input conditional expression.  New conditions will be chained
2174                 with logical AND operation.  If it is NULL, then the function
2175                 is used to return the number of alias checks.
2176    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2177                 to be checked.
2178
2179    Output:
2180    COND_EXPR - conditional expression.
2181
2182    The returned COND_EXPR is the conditional expression to be used in the if
2183    statement that controls which version of the loop gets executed at runtime.
2184 */
2185
2186 void
2187 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2188 {
2189   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2190     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2191   tree part_cond_expr;
2192
2193   if (comp_alias_ddrs.is_empty ())
2194     return;
2195
2196   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2197     {
2198       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2199       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2200
2201       if (dump_enabled_p ())
2202         {
2203           dump_printf_loc (MSG_NOTE, vect_location,
2204                            "create runtime check for data references ");
2205           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2206           dump_printf (MSG_NOTE, " and ");
2207           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2208           dump_printf (MSG_NOTE, "\n");
2209         }
2210
2211       /* Create condition expression for each pair data references.  */
2212       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2213       if (*cond_expr)
2214         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2215                                   *cond_expr, part_cond_expr);
2216       else
2217         *cond_expr = part_cond_expr;
2218     }
2219
2220   if (dump_enabled_p ())
2221     dump_printf_loc (MSG_NOTE, vect_location,
2222                      "created %u versioning for alias checks.\n",
2223                      comp_alias_ddrs.length ());
2224 }
2225
2226
2227 /* Function vect_loop_versioning.
2228
2229    If the loop has data references that may or may not be aligned or/and
2230    has data reference relations whose independence was not proven then
2231    two versions of the loop need to be generated, one which is vectorized
2232    and one which isn't.  A test is then generated to control which of the
2233    loops is executed.  The test checks for the alignment of all of the
2234    data references that may or may not be aligned.  An additional
2235    sequence of runtime tests is generated for each pairs of DDRs whose
2236    independence was not proven.  The vectorized version of loop is
2237    executed only if both alias and alignment tests are passed.
2238
2239    The test generated to check which version of loop is executed
2240    is modified to also check for profitability as indicated by the
2241    cost model threshold TH.
2242
2243    The versioning precondition(s) are placed in *COND_EXPR and
2244    *COND_EXPR_STMT_LIST.  */
2245
2246 void
2247 vect_loop_versioning (loop_vec_info loop_vinfo,
2248                       unsigned int th, bool check_profitability)
2249 {
2250   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2251   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2252   basic_block condition_bb;
2253   gphi_iterator gsi;
2254   gimple_stmt_iterator cond_exp_gsi;
2255   basic_block merge_bb;
2256   basic_block new_exit_bb;
2257   edge new_exit_e, e;
2258   gphi *orig_phi, *new_phi;
2259   tree cond_expr = NULL_TREE;
2260   gimple_seq cond_expr_stmt_list = NULL;
2261   tree arg;
2262   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2263   gimple_seq gimplify_stmt_list = NULL;
2264   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2265   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2266   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2267   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2268
2269   if (check_profitability)
2270     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2271                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2272                                                        th));
2273
2274   if (version_niter)
2275     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2276
2277   if (cond_expr)
2278     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2279                                         is_gimple_condexpr, NULL_TREE);
2280
2281   if (version_align)
2282     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2283                                        &cond_expr_stmt_list);
2284
2285   if (version_alias)
2286     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2287
2288   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2289                                       is_gimple_condexpr, NULL_TREE);
2290   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2291
2292   initialize_original_copy_tables ();
2293   if (scalar_loop)
2294     {
2295       edge scalar_e;
2296       basic_block preheader, scalar_preheader;
2297
2298       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2299          scale LOOP's frequencies instead.  */
2300       nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2301                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2302       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2303       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2304          while we need to move it above LOOP's preheader.  */
2305       e = loop_preheader_edge (loop);
2306       scalar_e = loop_preheader_edge (scalar_loop);
2307       gcc_assert (empty_block_p (e->src)
2308                   && single_pred_p (e->src));
2309       gcc_assert (empty_block_p (scalar_e->src)
2310                   && single_pred_p (scalar_e->src));
2311       gcc_assert (single_pred_p (condition_bb));
2312       preheader = e->src;
2313       scalar_preheader = scalar_e->src;
2314       scalar_e = find_edge (condition_bb, scalar_preheader);
2315       e = single_pred_edge (preheader);
2316       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2317                                       scalar_preheader);
2318       redirect_edge_and_branch_force (scalar_e, preheader);
2319       redirect_edge_and_branch_force (e, condition_bb);
2320       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2321                                single_pred (condition_bb));
2322       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2323                                single_pred (scalar_preheader));
2324       set_immediate_dominator (CDI_DOMINATORS, preheader,
2325                                condition_bb);
2326     }
2327   else
2328     nloop = loop_version (loop, cond_expr, &condition_bb,
2329                           prob, prob, REG_BR_PROB_BASE - prob, true);
2330
2331   if (version_niter)
2332     {
2333       /* The versioned loop could be infinite, we need to clear existing
2334          niter information which is copied from the original loop.  */
2335       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2336       vect_free_loop_info_assumptions (nloop);
2337       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2338       loop_constraint_set (loop, LOOP_C_INFINITE);
2339     }
2340
2341   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2342       && dump_enabled_p ())
2343     {
2344       if (version_alias)
2345         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2346                          "loop versioned for vectorization because of "
2347                          "possible aliasing\n");
2348       if (version_align)
2349         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2350                          "loop versioned for vectorization to enhance "
2351                          "alignment\n");
2352
2353     }
2354   free_original_copy_tables ();
2355
2356   /* Loop versioning violates an assumption we try to maintain during
2357      vectorization - that the loop exit block has a single predecessor.
2358      After versioning, the exit block of both loop versions is the same
2359      basic block (i.e. it has two predecessors). Just in order to simplify
2360      following transformations in the vectorizer, we fix this situation
2361      here by adding a new (empty) block on the exit-edge of the loop,
2362      with the proper loop-exit phis to maintain loop-closed-form.
2363      If loop versioning wasn't done from loop, but scalar_loop instead,
2364      merge_bb will have already just a single successor.  */
2365
2366   merge_bb = single_exit (loop)->dest;
2367   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2368     {
2369       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2370       new_exit_bb = split_edge (single_exit (loop));
2371       new_exit_e = single_exit (loop);
2372       e = EDGE_SUCC (new_exit_bb, 0);
2373
2374       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2375         {
2376           tree new_res;
2377           orig_phi = gsi.phi ();
2378           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2379           new_phi = create_phi_node (new_res, new_exit_bb);
2380           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2381           add_phi_arg (new_phi, arg, new_exit_e,
2382                        gimple_phi_arg_location_from_edge (orig_phi, e));
2383           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2384         }
2385     }
2386
2387   /* End loop-exit-fixes after versioning.  */
2388
2389   if (cond_expr_stmt_list)
2390     {
2391       cond_exp_gsi = gsi_last_bb (condition_bb);
2392       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2393                              GSI_SAME_STMT);
2394     }
2395   update_ssa (TODO_update_ssa);
2396 }