gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI argumnets
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src)
 106           && (!rename_from_outer_loop || e->src != outer_loop->header))
 107         continue;
 108       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 109            gsi_next (&gsi))
 110         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 111     }
 112 }
 113
 114
 115 struct adjust_info
 116 {
 117   tree from, to;
 118   basic_block bb;
 119 };
 120
 121 /* A stack of values to be adjusted in debug stmts.  We have to
 122    process them LIFO, so that the closest substitution applies.  If we
 123    processed them FIFO, without the stack, we might substitute uses
 124    with a PHI DEF that would soon become non-dominant, and when we got
 125    to the suitable one, it wouldn't have anything to substitute any
 126    more.  */
 127 static vec<adjust_info, va_heap> adjust_vec;
 128
 129 /* Adjust any debug stmts that referenced AI->from values to use the
 130    loop-closed AI->to, if the references are dominated by AI->bb and
 131    not by the definition of AI->from.  */
 132
 133 static void
 134 adjust_debug_stmts_now (adjust_info *ai)
 135 {
 136   basic_block bbphi = ai->bb;
 137   tree orig_def = ai->from;
 138   tree new_def = ai->to;
 139   imm_use_iterator imm_iter;
 140   gimple *stmt;
 141   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 142
 143   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 144
 145   /* Adjust any debug stmts that held onto non-loop-closed
 146      references.  */
 147   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 148     {
 149       use_operand_p use_p;
 150       basic_block bbuse;
 151
 152       if (!is_gimple_debug (stmt))
 153         continue;
 154
 155       gcc_assert (gimple_debug_bind_p (stmt));
 156
 157       bbuse = gimple_bb (stmt);
 158
 159       if ((bbuse == bbphi
 160            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 161           && !(bbuse == bbdef
 162                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 163         {
 164           if (new_def)
 165             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 166               SET_USE (use_p, new_def);
 167           else
 168             {
 169               gimple_debug_bind_reset_value (stmt);
 170               update_stmt (stmt);
 171             }
 172         }
 173     }
 174 }
 175
 176 /* Adjust debug stmts as scheduled before.  */
 177
 178 static void
 179 adjust_vec_debug_stmts (void)
 180 {
 181   if (!MAY_HAVE_DEBUG_STMTS)
 182     return;
 183
 184   gcc_assert (adjust_vec.exists ());
 185
 186   while (!adjust_vec.is_empty ())
 187     {
 188       adjust_debug_stmts_now (&adjust_vec.last ());
 189       adjust_vec.pop ();
 190     }
 191 }
 192
 193 /* Adjust any debug stmts that referenced FROM values to use the
 194    loop-closed TO, if the references are dominated by BB and not by
 195    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 196    will be postponed until adjust_vec_debug_stmts is called.  */
 197
 198 static void
 199 adjust_debug_stmts (tree from, tree to, basic_block bb)
 200 {
 201   adjust_info ai;
 202
 203   if (MAY_HAVE_DEBUG_STMTS
 204       && TREE_CODE (from) == SSA_NAME
 205       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 206       && ! virtual_operand_p (from))
 207     {
 208       ai.from = from;
 209       ai.to = to;
 210       ai.bb = bb;
 211
 212       if (adjust_vec.exists ())
 213         adjust_vec.safe_push (ai);
 214       else
 215         adjust_debug_stmts_now (&ai);
 216     }
 217 }
 218
 219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 220    to adjust any debug stmts that referenced the old phi arg,
 221    presumably non-loop-closed references left over from other
 222    transformations.  */
 223
 224 static void
 225 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 226 {
 227   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 228
 229   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 230
 231   if (MAY_HAVE_DEBUG_STMTS)
 232     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 233                         gimple_bb (update_phi));
 234 }
 235
 236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 237    that starts at zero, increases by one and its limit is NITERS.
 238
 239    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 240
 241 void
 242 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 243 {
 244   tree indx_before_incr, indx_after_incr;
 245   gcond *cond_stmt;
 246   gcond *orig_cond;
 247   edge exit_edge = single_exit (loop);
 248   gimple_stmt_iterator loop_cond_gsi;
 249   gimple_stmt_iterator incr_gsi;
 250   bool insert_after;
 251   tree init = build_int_cst (TREE_TYPE (niters), 0);
 252   tree step = build_int_cst (TREE_TYPE (niters), 1);
 253   source_location loop_loc;
 254   enum tree_code code;
 255
 256   orig_cond = get_loop_exit_condition (loop);
 257   gcc_assert (orig_cond);
 258   loop_cond_gsi = gsi_for_stmt (orig_cond);
 259
 260   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 261   create_iv (init, step, NULL_TREE, loop,
 262              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 263
 264   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 265                                               true, NULL_TREE, true,
 266                                               GSI_SAME_STMT);
 267   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 268                                      true, GSI_SAME_STMT);
 269
 270   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 271   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 272                                  NULL_TREE);
 273
 274   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 275
 276   /* Remove old loop exit test:  */
 277   gsi_remove (&loop_cond_gsi, true);
 278   free_stmt_vec_info (orig_cond);
 279
 280   loop_loc = find_loop_location (loop);
 281   if (dump_enabled_p ())
 282     {
 283       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 284         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 285                      LOCATION_LINE (loop_loc));
 286       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 287     }
 288   loop->nb_iterations = niters;
 289 }
 290
 291 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 292    For all PHI arguments in FROM->dest and TO->dest from those
 293    edges ensure that TO->dest PHI arguments have current_def
 294    to that in from.  */
 295
 296 static void
 297 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 298 {
 299   gimple_stmt_iterator gsi_from, gsi_to;
 300
 301   for (gsi_from = gsi_start_phis (from->dest),
 302        gsi_to = gsi_start_phis (to->dest);
 303        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 304     {
 305       gimple *from_phi = gsi_stmt (gsi_from);
 306       gimple *to_phi = gsi_stmt (gsi_to);
 307       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 308       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 309       if (virtual_operand_p (from_arg))
 310         {
 311           gsi_next (&gsi_from);
 312           continue;
 313         }
 314       if (virtual_operand_p (to_arg))
 315         {
 316           gsi_next (&gsi_to);
 317           continue;
 318         }
 319       if (TREE_CODE (from_arg) != SSA_NAME)
 320         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 321       else
 322         {
 323           if (get_current_def (to_arg) == NULL_TREE)
 324             set_current_def (to_arg, get_current_def (from_arg));
 325         }
 326       gsi_next (&gsi_from);
 327       gsi_next (&gsi_to);
 328     }
 329
 330   gphi *from_phi = get_virtual_phi (from->dest);
 331   gphi *to_phi = get_virtual_phi (to->dest);
 332   if (from_phi)
 333     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 334                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 335 }
 336
 337
 338 /* Given LOOP this function generates a new copy of it and puts it
 339    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 340    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 341    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 342    entry or exit of LOOP.  */
 343
 344 struct loop *
 345 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 346                                         struct loop *scalar_loop, edge e)
 347 {
 348   struct loop *new_loop;
 349   basic_block *new_bbs, *bbs, *pbbs;
 350   bool at_exit;
 351   bool was_imm_dom;
 352   basic_block exit_dest;
 353   edge exit, new_exit;
 354   bool duplicate_outer_loop = false;
 355
 356   exit = single_exit (loop);
 357   at_exit = (e == exit);
 358   if (!at_exit && e != loop_preheader_edge (loop))
 359     return NULL;
 360
 361   if (scalar_loop == NULL)
 362     scalar_loop = loop;
 363
 364   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 365   pbbs = bbs + 1;
 366   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 367   /* Allow duplication of outer loops.  */
 368   if (scalar_loop->inner)
 369     duplicate_outer_loop = true;
 370   /* Check whether duplication is possible.  */
 371   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 372     {
 373       free (bbs);
 374       return NULL;
 375     }
 376
 377   /* Generate new loop structure.  */
 378   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 379   duplicate_subloops (scalar_loop, new_loop);
 380
 381   exit_dest = exit->dest;
 382   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 383                                           exit_dest) == loop->header ?
 384                  true : false);
 385
 386   /* Also copy the pre-header, this avoids jumping through hoops to
 387      duplicate the loop entry PHI arguments.  Create an empty
 388      pre-header unconditionally for this.  */
 389   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 390   edge entry_e = single_pred_edge (preheader);
 391   bbs[0] = preheader;
 392   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 393
 394   exit = single_exit (scalar_loop);
 395   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 396             &exit, 1, &new_exit, NULL,
 397             at_exit ? loop->latch : e->src, true);
 398   exit = single_exit (loop);
 399   basic_block new_preheader = new_bbs[0];
 400
 401   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 402
 403   if (scalar_loop != loop)
 404     {
 405       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 406          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 407          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 408          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 409          header) to have current_def set, so copy them over.  */
 410       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 411                                                 exit);
 412       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 413                                                            0),
 414                                                 EDGE_SUCC (loop->latch, 0));
 415     }
 416
 417   if (at_exit) /* Add the loop copy at exit.  */
 418     {
 419       if (scalar_loop != loop)
 420         {
 421           gphi_iterator gsi;
 422           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 423
 424           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 425                gsi_next (&gsi))
 426             {
 427               gphi *phi = gsi.phi ();
 428               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 429               location_t orig_locus
 430                 = gimple_phi_arg_location_from_edge (phi, e);
 431
 432               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 433             }
 434         }
 435       redirect_edge_and_branch_force (e, new_preheader);
 436       flush_pending_stmts (e);
 437       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 438       if (was_imm_dom || duplicate_outer_loop)
 439         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 440
 441       /* And remove the non-necessary forwarder again.  Keep the other
 442          one so we have a proper pre-header for the loop at the exit edge.  */
 443       redirect_edge_pred (single_succ_edge (preheader),
 444                           single_pred (preheader));
 445       delete_basic_block (preheader);
 446       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 447                                loop_preheader_edge (scalar_loop)->src);
 448     }
 449   else /* Add the copy at entry.  */
 450     {
 451       if (scalar_loop != loop)
 452         {
 453           /* Remove the non-necessary forwarder of scalar_loop again.  */
 454           redirect_edge_pred (single_succ_edge (preheader),
 455                               single_pred (preheader));
 456           delete_basic_block (preheader);
 457           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 458                                    loop_preheader_edge (scalar_loop)->src);
 459           preheader = split_edge (loop_preheader_edge (loop));
 460           entry_e = single_pred_edge (preheader);
 461         }
 462
 463       redirect_edge_and_branch_force (entry_e, new_preheader);
 464       flush_pending_stmts (entry_e);
 465       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 466
 467       redirect_edge_and_branch_force (new_exit, preheader);
 468       flush_pending_stmts (new_exit);
 469       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 470
 471       /* And remove the non-necessary forwarder again.  Keep the other
 472          one so we have a proper pre-header for the loop at the exit edge.  */
 473       redirect_edge_pred (single_succ_edge (new_preheader),
 474                           single_pred (new_preheader));
 475       delete_basic_block (new_preheader);
 476       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 477                                loop_preheader_edge (new_loop)->src);
 478     }
 479
 480   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 481     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 482
 483   if (scalar_loop != loop)
 484     {
 485       /* Update new_loop->header PHIs, so that on the preheader
 486          edge they are the ones from loop rather than scalar_loop.  */
 487       gphi_iterator gsi_orig, gsi_new;
 488       edge orig_e = loop_preheader_edge (loop);
 489       edge new_e = loop_preheader_edge (new_loop);
 490
 491       for (gsi_orig = gsi_start_phis (loop->header),
 492            gsi_new = gsi_start_phis (new_loop->header);
 493            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 494            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 495         {
 496           gphi *orig_phi = gsi_orig.phi ();
 497           gphi *new_phi = gsi_new.phi ();
 498           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 499           location_t orig_locus
 500             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 501
 502           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 503         }
 504     }
 505
 506   free (new_bbs);
 507   free (bbs);
 508
 509   checking_verify_dominators (CDI_DOMINATORS);
 510
 511   return new_loop;
 512 }
 513
 514
 515 /* Given the condition expression COND, put it as the last statement of
 516    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 517    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 518    skip the loop.  PROBABILITY is the skip edge's probability.  */
 519
 520 static edge
 521 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 522                        basic_block guard_to, basic_block dom_bb,
 523                        int probability)
 524 {
 525   gimple_stmt_iterator gsi;
 526   edge new_e, enter_e;
 527   gcond *cond_stmt;
 528   gimple_seq gimplify_stmt_list = NULL;
 529
 530   enter_e = EDGE_SUCC (guard_bb, 0);
 531   enter_e->flags &= ~EDGE_FALLTHRU;
 532   enter_e->flags |= EDGE_FALSE_VALUE;
 533   gsi = gsi_last_bb (guard_bb);
 534
 535   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 536                                  NULL_TREE);
 537   if (gimplify_stmt_list)
 538     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 539
 540   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 541   gsi = gsi_last_bb (guard_bb);
 542   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 543
 544   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 545   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 546
 547   new_e->count = guard_bb->count;
 548   new_e->probability = probability;
 549   new_e->count = apply_probability (enter_e->count, probability);
 550   enter_e->count -= new_e->count;
 551   enter_e->probability = inverse_probability (probability);
 552   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 553   return new_e;
 554 }
 555
 556
 557 /* This function verifies that the following restrictions apply to LOOP:
 558    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 559        for innermost loop and 5 basic blocks for outer-loop.
 560    (2) it is single entry, single exit
 561    (3) its exit condition is the last stmt in the header
 562    (4) E is the entry/exit edge of LOOP.
 563  */
 564
 565 bool
 566 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 567 {
 568   edge exit_e = single_exit (loop);
 569   edge entry_e = loop_preheader_edge (loop);
 570   gcond *orig_cond = get_loop_exit_condition (loop);
 571   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 572   unsigned int num_bb = loop->inner? 5 : 2;
 573
 574   /* All loops have an outer scope; the only case loop->outer is NULL is for
 575      the function itself.  */
 576   if (!loop_outer (loop)
 577       || loop->num_nodes != num_bb
 578       || !empty_block_p (loop->latch)
 579       || !single_exit (loop)
 580       /* Verify that new loop exit condition can be trivially modified.  */
 581       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 582       || (e != exit_e && e != entry_e))
 583     return false;
 584
 585   return true;
 586 }
 587
 588 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 589    in the exit bb and rename all the uses after the loop.  This simplifies
 590    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 591    (but normally loop closed SSA form doesn't require virtual PHIs to be
 592    in the same form).  Doing this early simplifies the checking what
 593    uses should be renamed.  */
 594
 595 static void
 596 create_lcssa_for_virtual_phi (struct loop *loop)
 597 {
 598   gphi_iterator gsi;
 599   edge exit_e = single_exit (loop);
 600
 601   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 602     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 603       {
 604         gphi *phi = gsi.phi ();
 605         for (gsi = gsi_start_phis (exit_e->dest);
 606              !gsi_end_p (gsi); gsi_next (&gsi))
 607           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 608             break;
 609         if (gsi_end_p (gsi))
 610           {
 611             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 612             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 613             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 614             imm_use_iterator imm_iter;
 615             gimple *stmt;
 616             use_operand_p use_p;
 617
 618             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 619             gimple_phi_set_result (new_phi, new_vop);
 620             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 621               if (stmt != new_phi
 622                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 623                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 624                   SET_USE (use_p, new_vop);
 625           }
 626         break;
 627       }
 628
 629 }
 630
 631 /* Function vect_get_loop_location.
 632
 633    Extract the location of the loop in the source code.
 634    If the loop is not well formed for vectorization, an estimated
 635    location is calculated.
 636    Return the loop location if succeed and NULL if not.  */
 637
 638 source_location
 639 find_loop_location (struct loop *loop)
 640 {
 641   gimple *stmt = NULL;
 642   basic_block bb;
 643   gimple_stmt_iterator si;
 644
 645   if (!loop)
 646     return UNKNOWN_LOCATION;
 647
 648   stmt = get_loop_exit_condition (loop);
 649
 650   if (stmt
 651       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 652     return gimple_location (stmt);
 653
 654   /* If we got here the loop is probably not "well formed",
 655      try to estimate the loop location */
 656
 657   if (!loop->header)
 658     return UNKNOWN_LOCATION;
 659
 660   bb = loop->header;
 661
 662   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 663     {
 664       stmt = gsi_stmt (si);
 665       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 666         return gimple_location (stmt);
 667     }
 668
 669   return UNKNOWN_LOCATION;
 670 }
 671
 672 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 673
 674 static bool
 675 iv_phi_p (gphi *phi)
 676 {
 677   if (virtual_operand_p (PHI_RESULT (phi)))
 678     return false;
 679
 680   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 681   gcc_assert (stmt_info != NULL);
 682   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 683       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 684     return false;
 685
 686   return true;
 687 }
 688
 689 /* Function vect_can_advance_ivs_p
 690
 691    In case the number of iterations that LOOP iterates is unknown at compile
 692    time, an epilog loop will be generated, and the loop induction variables
 693    (IVs) will be "advanced" to the value they are supposed to take just before
 694    the epilog loop.  Here we check that the access function of the loop IVs
 695    and the expression that represents the loop bound are simple enough.
 696    These restrictions will be relaxed in the future.  */
 697
 698 bool
 699 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 700 {
 701   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 702   basic_block bb = loop->header;
 703   gphi_iterator gsi;
 704
 705   /* Analyze phi functions of the loop header.  */
 706
 707   if (dump_enabled_p ())
 708     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 709   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 710     {
 711       tree evolution_part;
 712
 713       gphi *phi = gsi.phi ();
 714       if (dump_enabled_p ())
 715         {
 716           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 717           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 718         }
 719
 720       /* Skip virtual phi's. The data dependences that are associated with
 721          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 722
 723          Skip reduction phis.  */
 724       if (!iv_phi_p (phi))
 725         {
 726           if (dump_enabled_p ())
 727             dump_printf_loc (MSG_NOTE, vect_location,
 728                              "reduc or virtual phi. skip.\n");
 729           continue;
 730         }
 731
 732       /* Analyze the evolution function.  */
 733
 734       evolution_part
 735         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 736       if (evolution_part == NULL_TREE)
 737         {
 738           if (dump_enabled_p ())
 739             dump_printf (MSG_MISSED_OPTIMIZATION,
 740                          "No access function or evolution.\n");
 741           return false;
 742         }
 743
 744       /* FORNOW: We do not transform initial conditions of IVs
 745          which evolution functions are not invariants in the loop.  */
 746
 747       if (!expr_invariant_in_loop_p (loop, evolution_part))
 748         {
 749           if (dump_enabled_p ())
 750             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 751                              "evolution not invariant in loop.\n");
 752           return false;
 753         }
 754
 755       /* FORNOW: We do not transform initial conditions of IVs
 756          which evolution functions are a polynomial of degree >= 2.  */
 757
 758       if (tree_is_chrec (evolution_part))
 759         {
 760           if (dump_enabled_p ())
 761             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 762                              "evolution is chrec.\n");
 763           return false;
 764         }
 765     }
 766
 767   return true;
 768 }
 769
 770
 771 /*   Function vect_update_ivs_after_vectorizer.
 772
 773      "Advance" the induction variables of LOOP to the value they should take
 774      after the execution of LOOP.  This is currently necessary because the
 775      vectorizer does not handle induction variables that are used after the
 776      loop.  Such a situation occurs when the last iterations of LOOP are
 777      peeled, because:
 778      1. We introduced new uses after LOOP for IVs that were not originally used
 779         after LOOP: the IVs of LOOP are now used by an epilog loop.
 780      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 781         times, whereas the loop IVs should be bumped N times.
 782
 783      Input:
 784      - LOOP - a loop that is going to be vectorized. The last few iterations
 785               of LOOP were peeled.
 786      - NITERS - the number of iterations that LOOP executes (before it is
 787                 vectorized). i.e, the number of times the ivs should be bumped.
 788      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 789                   coming out from LOOP on which there are uses of the LOOP ivs
 790                   (this is the path from LOOP->exit to epilog_loop->preheader).
 791
 792                   The new definitions of the ivs are placed in LOOP->exit.
 793                   The phi args associated with the edge UPDATE_E in the bb
 794                   UPDATE_E->dest are updated accordingly.
 795
 796      Assumption 1: Like the rest of the vectorizer, this function assumes
 797      a single loop exit that has a single predecessor.
 798
 799      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 800      organized in the same order.
 801
 802      Assumption 3: The access function of the ivs is simple enough (see
 803      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 804
 805      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 806      coming out of LOOP on which the ivs of LOOP are used (this is the path
 807      that leads to the epilog loop; other paths skip the epilog loop).  This
 808      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 809      needs to have its phis updated.
 810  */
 811
 812 static void
 813 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 814                                   tree niters, edge update_e)
 815 {
 816   gphi_iterator gsi, gsi1;
 817   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 818   basic_block update_bb = update_e->dest;
 819   basic_block exit_bb = single_exit (loop)->dest;
 820
 821   /* Make sure there exists a single-predecessor exit bb:  */
 822   gcc_assert (single_pred_p (exit_bb));
 823   gcc_assert (single_succ_edge (exit_bb) == update_e);
 824
 825   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 826        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 827        gsi_next (&gsi), gsi_next (&gsi1))
 828     {
 829       tree init_expr;
 830       tree step_expr, off;
 831       tree type;
 832       tree var, ni, ni_name;
 833       gimple_stmt_iterator last_gsi;
 834
 835       gphi *phi = gsi.phi ();
 836       gphi *phi1 = gsi1.phi ();
 837       if (dump_enabled_p ())
 838         {
 839           dump_printf_loc (MSG_NOTE, vect_location,
 840                            "vect_update_ivs_after_vectorizer: phi: ");
 841           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 842         }
 843
 844       /* Skip reduction and virtual phis.  */
 845       if (!iv_phi_p (phi))
 846         {
 847           if (dump_enabled_p ())
 848             dump_printf_loc (MSG_NOTE, vect_location,
 849                              "reduc or virtual phi. skip.\n");
 850           continue;
 851         }
 852
 853       type = TREE_TYPE (gimple_phi_result (phi));
 854       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 855       step_expr = unshare_expr (step_expr);
 856
 857       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 858          of degree >= 2 or exponential.  */
 859       gcc_assert (!tree_is_chrec (step_expr));
 860
 861       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 862
 863       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 864                          fold_convert (TREE_TYPE (step_expr), niters),
 865                          step_expr);
 866       if (POINTER_TYPE_P (type))
 867         ni = fold_build_pointer_plus (init_expr, off);
 868       else
 869         ni = fold_build2 (PLUS_EXPR, type,
 870                           init_expr, fold_convert (type, off));
 871
 872       var = create_tmp_var (type, "tmp");
 873
 874       last_gsi = gsi_last_bb (exit_bb);
 875       gimple_seq new_stmts = NULL;
 876       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 877       /* Exit_bb shouldn't be empty.  */
 878       if (!gsi_end_p (last_gsi))
 879         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 880       else
 881         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 882
 883       /* Fix phi expressions in the successor bb.  */
 884       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 885     }
 886 }
 887
 888 /* Function vect_gen_prolog_loop_niters
 889
 890    Generate the number of iterations which should be peeled as prolog for the
 891    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 892    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 893    As a result, after the execution of this loop, the data reference DR will
 894    refer to an aligned location.  The following computation is generated:
 895
 896    If the misalignment of DR is known at compile time:
 897      addr_mis = int mis = DR_MISALIGNMENT (dr);
 898    Else, compute address misalignment in bytes:
 899      addr_mis = addr & (vectype_align - 1)
 900
 901    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 902
 903    (elem_size = element type size; an element is the scalar element whose type
 904    is the inner type of the vectype)
 905
 906    The computations will be emitted at the end of BB.  We also compute and
 907    store upper bound of the result in BOUND.
 908
 909    When the step of the data-ref in the loop is not 1 (as in interleaved data
 910    and SLP), the number of iterations of the prolog must be divided by the step
 911    (which is equal to the size of interleaved group).
 912
 913    The above formulas assume that VF == number of elements in the vector. This
 914    may not hold when there are multiple-types in the loop.
 915    In this case, for some data-references in the loop the VF does not represent
 916    the number of elements that fit in the vector.  Therefore, instead of VF we
 917    use TYPE_VECTOR_SUBPARTS.  */
 918
 919 static tree
 920 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 921                              basic_block bb, int *bound)
 922 {
 923   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 924   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 925   tree var;
 926   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 927   gimple_seq stmts = NULL, new_stmts = NULL;
 928   tree iters, iters_name;
 929   gimple *dr_stmt = DR_STMT (dr);
 930   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 931   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 932   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 933   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 934
 935   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 936     {
 937       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 938
 939       if (dump_enabled_p ())
 940         dump_printf_loc (MSG_NOTE, vect_location,
 941                          "known peeling = %d.\n", npeel);
 942
 943       iters = build_int_cst (niters_type, npeel);
 944       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + 1;
 945     }
 946   else
 947     {
 948       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 949       tree offset = negative
 950           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 951       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 952                                                 &stmts, offset, loop);
 953       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 954       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 955       HOST_WIDE_INT elem_size =
 956                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 957       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 958       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 959       tree nelements_tree = build_int_cst (type, nelements);
 960       tree byte_misalign;
 961       tree elem_misalign;
 962
 963       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 964       byte_misalign =
 965         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 966                      vectype_align_minus_1);
 967
 968       /* Create:  elem_misalign = byte_misalign / element_size  */
 969       elem_misalign =
 970         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 971
 972       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
 973       if (negative)
 974         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
 975       else
 976         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
 977       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
 978       iters = fold_convert (niters_type, iters);
 979       *bound = nelements;
 980     }
 981
 982   if (dump_enabled_p ())
 983     {
 984       dump_printf_loc (MSG_NOTE, vect_location,
 985                        "niters for prolog loop: ");
 986       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
 987       dump_printf (MSG_NOTE, "\n");
 988     }
 989
 990   var = create_tmp_var (niters_type, "prolog_loop_niters");
 991   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
 992
 993   if (new_stmts)
 994     gimple_seq_add_seq (&stmts, new_stmts);
 995   if (stmts)
 996     {
 997       gcc_assert (single_succ_p (bb));
 998       gimple_stmt_iterator gsi = gsi_last_bb (bb);
 999       if (gsi_end_p (gsi))
1000         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1001       else
1002         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1003     }
1004   return iters_name;
1005 }
1006
1007
1008 /* Function vect_update_init_of_dr
1009
1010    NITERS iterations were peeled from LOOP.  DR represents a data reference
1011    in LOOP.  This function updates the information recorded in DR to
1012    account for the fact that the first NITERS iterations had already been
1013    executed.  Specifically, it updates the OFFSET field of DR.  */
1014
1015 static void
1016 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1017 {
1018   tree offset = DR_OFFSET (dr);
1019
1020   niters = fold_build2 (MULT_EXPR, sizetype,
1021                         fold_convert (sizetype, niters),
1022                         fold_convert (sizetype, DR_STEP (dr)));
1023   offset = fold_build2 (PLUS_EXPR, sizetype,
1024                         fold_convert (sizetype, offset), niters);
1025   DR_OFFSET (dr) = offset;
1026 }
1027
1028
1029 /* Function vect_update_inits_of_drs
1030
1031    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1032    This function updates the information recorded for the data references in
1033    the loop to account for the fact that the first NITERS iterations had
1034    already been executed.  Specifically, it updates the initial_condition of
1035    the access_function of all the data_references in the loop.  */
1036
1037 static void
1038 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1039 {
1040   unsigned int i;
1041   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1042   struct data_reference *dr;
1043
1044   if (dump_enabled_p ())
1045     dump_printf_loc (MSG_NOTE, vect_location,
1046                      "=== vect_update_inits_of_dr ===\n");
1047
1048   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1049   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1050     {
1051       gimple_seq seq;
1052       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1053       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1054
1055       niters = fold_convert (sizetype, niters);
1056       niters = force_gimple_operand (niters, &seq, false, var);
1057       if (seq)
1058         {
1059           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1060           gcc_assert (!new_bb);
1061         }
1062     }
1063
1064   FOR_EACH_VEC_ELT (datarefs, i, dr)
1065     vect_update_init_of_dr (dr, niters);
1066 }
1067
1068
1069 /* This function builds ni_name = number of iterations.  Statements
1070    are emitted on the loop preheader edge.  */
1071
1072 tree
1073 vect_build_loop_niters (loop_vec_info loop_vinfo)
1074 {
1075   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1076   if (TREE_CODE (ni) == INTEGER_CST)
1077     return ni;
1078   else
1079     {
1080       tree ni_name, var;
1081       gimple_seq stmts = NULL;
1082       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1083
1084       var = create_tmp_var (TREE_TYPE (ni), "niters");
1085       ni_name = force_gimple_operand (ni, &stmts, false, var);
1086       if (stmts)
1087         gsi_insert_seq_on_edge_immediate (pe, stmts);
1088
1089       return ni_name;
1090     }
1091 }
1092
1093 /* Calculate the number of iterations under which scalar loop will be
1094    preferred than vectorized loop.  NITERS_PROLOG is the number of
1095    iterations of prolog loop.  If it's integer const, the integer
1096    number is also passed by INT_NITERS_PROLOG.  VF is vector factor;
1097    TH is the threshold for vectorized loop if CHECK_PROFITABILITY is
1098    true.  This function also store upper bound of the result in BOUND.  */
1099
1100 static tree
1101 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1102                              int bound_prolog, int vf, int th, int *bound,
1103                              bool check_profitability)
1104 {
1105   tree type = TREE_TYPE (niters_prolog);
1106   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1107                              build_int_cst (type, vf));
1108
1109   *bound = vf + bound_prolog;
1110   if (check_profitability)
1111     {
1112       th++;
1113       /* Peeling for constant times.  */
1114       if (int_niters_prolog >= 0)
1115         {
1116           *bound = (int_niters_prolog + vf < th
1117                                            ? th
1118                                            : vf + int_niters_prolog);
1119           return build_int_cst (type, *bound);
1120         }
1121       /* Peeling for unknown times, in this case, prolog loop must
1122          execute less than bound_prolog times.  */
1123       if (th >=  vf + bound_prolog - 1)
1124         {
1125           *bound = th;
1126           return build_int_cst (type, th);
1127         }
1128       /* Need to do runtime comparison, but bound remains the same.  */
1129       else if (th > vf)
1130         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1131     }
1132   return niters;
1133 }
1134
1135 /* This function generates the following statements:
1136
1137    niters = number of iterations loop executes (after peeling)
1138    niters_vector = niters / vf
1139
1140    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1141    true if NITERS doesn't overflow.  */
1142
1143 void
1144 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1145                              tree *niters_vector_ptr, bool niters_no_overflow)
1146 {
1147   tree ni_minus_gap, var;
1148   tree niters_vector;
1149   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1150   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1151   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1152
1153   /* If epilogue loop is required because of data accesses with gaps, we
1154      subtract one iteration from the total number of iterations here for
1155      correct calculation of RATIO.  */
1156   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1157     {
1158       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1159                                   niters,
1160                                   build_one_cst (TREE_TYPE (niters)));
1161       if (!is_gimple_val (ni_minus_gap))
1162         {
1163           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1164           gimple *stmts = NULL;
1165           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1166                                                true, var);
1167           gsi_insert_seq_on_edge_immediate (pe, stmts);
1168         }
1169     }
1170   else
1171     ni_minus_gap = niters;
1172
1173   /* Create: niters >> log2(vf) */
1174   /* If it's known that niters == number of latch executions + 1 doesn't
1175      overflow, we can generate niters >> log2(vf); otherwise we generate
1176      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1177      will be at least one.  */
1178   if (niters_no_overflow)
1179     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1180                                  ni_minus_gap, log_vf);
1181   else
1182     niters_vector
1183       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1184                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1185                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1186                                                ni_minus_gap,
1187                                                build_int_cst
1188                                                  (TREE_TYPE (niters), vf)),
1189                                   log_vf),
1190                      build_int_cst (TREE_TYPE (niters), 1));
1191
1192   if (!is_gimple_val (niters_vector))
1193     {
1194       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1195       gimple *stmts = NULL;
1196       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1197       gsi_insert_seq_on_edge_immediate (pe, stmts);
1198     }
1199   *niters_vector_ptr = niters_vector;
1200
1201   return;
1202 }
1203
1204 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1205    loop specified by LOOP_VINFO after vectorization, compute the number
1206    of iterations before vectorization (niters_vector * vf) and store it
1207    to NITERS_VECTOR_MULT_VF_PTR.  */
1208
1209 static void
1210 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1211                                      tree niters_vector,
1212                                      tree *niters_vector_mult_vf_ptr)
1213 {
1214   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1215   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1216   tree type = TREE_TYPE (niters_vector);
1217   tree log_vf = build_int_cst (type, exact_log2 (vf));
1218   basic_block exit_bb = single_exit (loop)->dest;
1219
1220   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1221   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1222                                             niters_vector, log_vf);
1223   if (!is_gimple_val (niters_vector_mult_vf))
1224     {
1225       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1226       gimple_seq stmts = NULL;
1227       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1228                                                     &stmts, true, var);
1229       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1230       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1231     }
1232   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1233 }
1234
1235 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1236    from SECOND/FIRST and puts it at the original loop's preheader/exit
1237    edge, the two loops are arranged as below:
1238
1239        preheader_a:
1240      first_loop:
1241        header_a:
1242          i_1 = PHI<i_0, i_2>;
1243          ...
1244          i_2 = i_1 + 1;
1245          if (cond_a)
1246            goto latch_a;
1247          else
1248            goto between_bb;
1249        latch_a:
1250          goto header_a;
1251
1252        between_bb:
1253          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1254
1255      second_loop:
1256        header_b:
1257          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1258                                  or with i_2 if no LCSSA phi is created
1259                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1260          ...
1261          i_4 = i_3 + 1;
1262          if (cond_b)
1263            goto latch_b;
1264          else
1265            goto exit_bb;
1266        latch_b:
1267          goto header_b;
1268
1269        exit_bb:
1270
1271    This function creates loop closed SSA for the first loop; update the
1272    second loop's PHI nodes by replacing argument on incoming edge with the
1273    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1274    is false, Loop closed ssa phis will only be created for non-iv phis for
1275    the first loop.
1276
1277    This function assumes exit bb of the first loop is preheader bb of the
1278    second loop, i.e, between_bb in the example code.  With PHIs updated,
1279    the second loop will execute rest iterations of the first.  */
1280
1281 static void
1282 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1283                                    struct loop *first, struct loop *second,
1284                                    bool create_lcssa_for_iv_phis)
1285 {
1286   gphi_iterator gsi_update, gsi_orig;
1287   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1288
1289   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1290   edge second_preheader_e = loop_preheader_edge (second);
1291   basic_block between_bb = single_exit (first)->dest;
1292
1293   gcc_assert (between_bb == second_preheader_e->src);
1294   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1295   /* Either the first loop or the second is the loop to be vectorized.  */
1296   gcc_assert (loop == first || loop == second);
1297
1298   for (gsi_orig = gsi_start_phis (first->header),
1299        gsi_update = gsi_start_phis (second->header);
1300        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1301        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1302     {
1303       gphi *orig_phi = gsi_orig.phi ();
1304       gphi *update_phi = gsi_update.phi ();
1305
1306       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1307       /* Generate lcssa PHI node for the first loop.  */
1308       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1309       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1310         {
1311           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1312           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1313           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1314           arg = new_res;
1315         }
1316
1317       /* Update PHI node in the second loop by replacing arg on the loop's
1318          incoming edge.  */
1319       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1320     }
1321 }
1322
1323 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1324    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1325    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1326    appear like below:
1327
1328        guard_bb:
1329          if (cond)
1330            goto merge_bb;
1331          else
1332            goto skip_loop;
1333
1334      skip_loop:
1335        header_a:
1336          i_1 = PHI<i_0, i_2>;
1337          ...
1338          i_2 = i_1 + 1;
1339          if (cond_a)
1340            goto latch_a;
1341          else
1342            goto exit_a;
1343        latch_a:
1344          goto header_a;
1345
1346        exit_a:
1347          i_5 = PHI<i_2>;
1348
1349        merge_bb:
1350          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1351
1352      update_loop:
1353        header_b:
1354          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1355          ...
1356          i_4 = i_3 + 1;
1357          if (cond_b)
1358            goto latch_b;
1359          else
1360            goto exit_bb;
1361        latch_b:
1362          goto header_b;
1363
1364        exit_bb:
1365
1366    This function creates PHI nodes at merge_bb and replaces the use of i_5
1367    in the update_loop's PHI node with the result of new PHI result.  */
1368
1369 static void
1370 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1371                                     struct loop *update_loop,
1372                                     edge guard_edge, edge merge_edge)
1373 {
1374   source_location merge_loc, guard_loc;
1375   edge orig_e = loop_preheader_edge (skip_loop);
1376   edge update_e = loop_preheader_edge (update_loop);
1377   gphi_iterator gsi_orig, gsi_update;
1378
1379   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1380         gsi_update = gsi_start_phis (update_loop->header));
1381        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1382        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1383     {
1384       gphi *orig_phi = gsi_orig.phi ();
1385       gphi *update_phi = gsi_update.phi ();
1386
1387       /* Generate new phi node at merge bb of the guard.  */
1388       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1389       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1390
1391       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1392          args in NEW_PHI for these edges.  */
1393       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1394       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1395       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1396       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1397       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1398       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1399
1400       /* Update phi in UPDATE_PHI.  */
1401       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1402     }
1403 }
1404
1405 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1406    this function searches for the corresponding lcssa phi node in exit
1407    bb of LOOP.  If it is found, return the phi result; otherwise return
1408    NULL.  */
1409
1410 static tree
1411 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1412                 gphi *lcssa_phi)
1413 {
1414   gphi_iterator gsi;
1415   edge e = single_exit (loop);
1416
1417   gcc_assert (single_pred_p (e->dest));
1418   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1419     {
1420       gphi *phi = gsi.phi ();
1421       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1422                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1423         return PHI_RESULT (phi);
1424     }
1425   return NULL_TREE;
1426 }
1427
1428 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1429    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1430    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1431    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1432    The CFG looks like:
1433
1434      loop:
1435        header_a:
1436          i_1 = PHI<i_0, i_2>;
1437          ...
1438          i_2 = i_1 + 1;
1439          if (cond_a)
1440            goto latch_a;
1441          else
1442            goto exit_a;
1443        latch_a:
1444          goto header_a;
1445
1446        exit_a:
1447
1448        guard_bb:
1449          if (cond)
1450            goto merge_bb;
1451          else
1452            goto epilog_loop;
1453
1454        ;; fall_through_bb
1455
1456      epilog_loop:
1457        header_b:
1458          i_3 = PHI<i_2, i_4>;
1459          ...
1460          i_4 = i_3 + 1;
1461          if (cond_b)
1462            goto latch_b;
1463          else
1464            goto merge_bb;
1465        latch_b:
1466          goto header_b;
1467
1468        merge_bb:
1469          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1470
1471        exit_bb:
1472          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1473
1474    For each name used out side EPILOG (i.e - for each name that has a lcssa
1475    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1476    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1477    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1478    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1479    in exit_bb will also be updated.  */
1480
1481 static void
1482 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1483                                     edge guard_edge, edge merge_edge)
1484 {
1485   gphi_iterator gsi;
1486   basic_block merge_bb = guard_edge->dest;
1487
1488   gcc_assert (single_succ_p (merge_bb));
1489   edge e = single_succ_edge (merge_bb);
1490   basic_block exit_bb = e->dest;
1491   gcc_assert (single_pred_p (exit_bb));
1492   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1493
1494   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1495     {
1496       gphi *update_phi = gsi.phi ();
1497       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1498       /* This loop-closed-phi actually doesn't represent a use out of the
1499          loop - the phi arg is a constant.  */
1500       if (TREE_CODE (old_arg) != SSA_NAME)
1501         continue;
1502
1503       tree merge_arg = get_current_def (old_arg);
1504       if (!merge_arg)
1505         merge_arg = old_arg;
1506
1507       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1508       /* If the var is live after loop but not a reduction, we simply
1509          use the old arg.  */
1510       if (!guard_arg)
1511         guard_arg = old_arg;
1512
1513       /* Create new phi node in MERGE_BB:  */
1514       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1515       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1516
1517       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1518          the two PHI args in merge_phi for these edges.  */
1519       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1520       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1521
1522       /* Update the original phi in exit_bb.  */
1523       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1524     }
1525 }
1526
1527 /* EPILOG loop is duplicated from the original loop for vectorizing,
1528    the arg of its loop closed ssa PHI needs to be updated.  */
1529
1530 static void
1531 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1532 {
1533   gphi_iterator gsi;
1534   basic_block exit_bb = single_exit (epilog)->dest;
1535
1536   gcc_assert (single_pred_p (exit_bb));
1537   edge e = EDGE_PRED (exit_bb, 0);
1538   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1539     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1540 }
1541
1542 /* Function vect_do_peeling.
1543
1544    Input:
1545    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1546
1547        preheader:
1548      LOOP:
1549        header_bb:
1550          loop_body
1551          if (exit_loop_cond) goto exit_bb
1552          else                goto header_bb
1553        exit_bb:
1554
1555    - NITERS: The number of iterations of the loop.
1556    - NITERSM1: The number of iterations of the loop's latch.
1557    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1558    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1559                               CHECK_PROFITABILITY is true.
1560    Output:
1561    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1562
1563    This function peels prolog and epilog from the loop, adds guards skipping
1564    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1565    would look like:
1566
1567        guard_bb_1:
1568          if (prefer_scalar_loop) goto merge_bb_1
1569          else                    goto guard_bb_2
1570
1571        guard_bb_2:
1572          if (skip_prolog) goto merge_bb_2
1573          else             goto prolog_preheader
1574
1575        prolog_preheader:
1576      PROLOG:
1577        prolog_header_bb:
1578          prolog_body
1579          if (exit_prolog_cond) goto prolog_exit_bb
1580          else                  goto prolog_header_bb
1581        prolog_exit_bb:
1582
1583        merge_bb_2:
1584
1585        vector_preheader:
1586      VECTOR LOOP:
1587        vector_header_bb:
1588          vector_body
1589          if (exit_vector_cond) goto vector_exit_bb
1590          else                  goto vector_header_bb
1591        vector_exit_bb:
1592
1593        guard_bb_3:
1594          if (skip_epilog) goto merge_bb_3
1595          else             goto epilog_preheader
1596
1597        merge_bb_1:
1598
1599        epilog_preheader:
1600      EPILOG:
1601        epilog_header_bb:
1602          epilog_body
1603          if (exit_epilog_cond) goto merge_bb_3
1604          else                  goto epilog_header_bb
1605
1606        merge_bb_3:
1607
1608    Note this function peels prolog and epilog only if it's necessary,
1609    as well as guards.
1610
1611    TODO: Guard for prefer_scalar_loop should be emitted along with
1612    versioning conditions if loop versioning is needed.  */
1613
1614 void
1615 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1616                  tree *niters_vector, int th, bool check_profitability,
1617                  bool niters_no_overflow)
1618 {
1619   edge e, guard_e;
1620   tree type = TREE_TYPE (niters), guard_cond;
1621   basic_block guard_bb, guard_to;
1622   int prob_prolog, prob_vector, prob_epilog;
1623   int bound_prolog = 0, bound_epilog = 0, bound = 0;
1624   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1625   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1626   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1627                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1628
1629   if (!prolog_peeling && !epilog_peeling)
1630     return;
1631
1632   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1633   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1634     vf = 3;
1635   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1636   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1637
1638   struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1639   struct loop *first_loop = loop;
1640   create_lcssa_for_virtual_phi (loop);
1641   update_ssa (TODO_update_ssa_only_virtuals);
1642
1643   if (MAY_HAVE_DEBUG_STMTS)
1644     {
1645       gcc_assert (!adjust_vec.exists ());
1646       adjust_vec.create (32);
1647     }
1648   initialize_original_copy_tables ();
1649
1650   /* Prolog loop may be skipped.  */
1651   bool skip_prolog = (prolog_peeling != 0);
1652   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1653      we peel for epilog loop.  */
1654   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1655   /* Epilog loop must be executed if the number of iterations for epilog
1656      loop is known at compile time, otherwise we need to add a check at
1657      the end of vector loop and skip to the end of epilog loop.  */
1658   bool skip_epilog = (prolog_peeling < 0
1659                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1660   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1661   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1662     skip_epilog = false;
1663
1664   /* Record the anchor bb at which guard should be placed if scalar loop
1665      may be preferred.  */
1666   basic_block anchor = loop_preheader_edge (loop)->src;
1667   if (skip_vector)
1668     split_edge (loop_preheader_edge (loop));
1669
1670   tree niters_prolog = build_int_cst (type, 0);
1671   source_location loop_loc = find_loop_location (loop);
1672   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1673   if (prolog_peeling)
1674     {
1675       e = loop_preheader_edge (loop);
1676       if (!slpeel_can_duplicate_loop_p (loop, e))
1677         {
1678           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1679                            "loop can't be duplicated to preheader edge.\n");
1680           gcc_unreachable ();
1681         }
1682       /* Peel prolog and put it on preheader edge of loop.  */
1683       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1684       if (!prolog)
1685         {
1686           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1687                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1688           gcc_unreachable ();
1689         }
1690       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1691       first_loop = prolog;
1692       reset_original_copy_tables ();
1693
1694       /* Generate and update the number of iterations for prolog loop.  */
1695       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1696                                                    &bound_prolog);
1697       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1698
1699       /* Skip the prolog loop.  */
1700       if (skip_prolog)
1701         {
1702           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1703                                     niters_prolog, build_int_cst (type, 0));
1704           guard_bb = loop_preheader_edge (prolog)->src;
1705           guard_to = split_edge (loop_preheader_edge (loop));
1706           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1707                                            guard_to, guard_bb,
1708                                            inverse_probability (prob_prolog));
1709           e = EDGE_PRED (guard_to, 0);
1710           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1711           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1712           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1713         }
1714       /* Update init address of DRs.  */
1715       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1716       /* Update niters for vector loop.  */
1717       LOOP_VINFO_NITERS (loop_vinfo)
1718         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1719       LOOP_VINFO_NITERSM1 (loop_vinfo)
1720         = fold_build2 (MINUS_EXPR, type,
1721                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1722       niters = vect_build_loop_niters (loop_vinfo);
1723
1724       /* Prolog iterates at most bound_prolog - 1 times, latch iterates
1725          at most bound_prolog - 2 times.  */
1726       record_niter_bound (prolog, bound_prolog - 2, false, true);
1727       delete_update_ssa ();
1728       adjust_vec_debug_stmts ();
1729       scev_reset ();
1730     }
1731
1732   if (epilog_peeling)
1733     {
1734       e = single_exit (loop);
1735       if (!slpeel_can_duplicate_loop_p (loop, e))
1736         {
1737           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1738                            "loop can't be duplicated to exit edge.\n");
1739           gcc_unreachable ();
1740         }
1741       /* Peel epilog and put it on exit edge of loop.  */
1742       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1743       if (!epilog)
1744         {
1745           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1746                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1747           gcc_unreachable ();
1748         }
1749       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1750
1751       /* Scalar version loop may be preferred.  In this case, add guard
1752          and skip to epilog.  Note this only happens when the number of
1753          iterations of loop is unknown at compile time, otherwise this
1754          won't be vectorized.  */
1755       if (skip_vector)
1756         {
1757           /* Guard_cond needs is based on NITERSM1 because NITERS might
1758              overflow, so here it is niters_scalar - 1 generated.  In
1759              other words, both niters_scalar and bound_epilog are for
1760              scalar loop's latch.  */
1761           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1762                                                 bound_prolog, vf - 1, th - 1,
1763                                                 &bound_epilog,
1764                                                 check_profitability);
1765           guard_cond = fold_build2 (LT_EXPR, boolean_type_node,
1766                                     nitersm1, t);
1767           guard_bb = anchor;
1768           guard_to = split_edge (loop_preheader_edge (epilog));
1769           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1770                                            guard_to, guard_bb,
1771                                            inverse_probability (prob_vector));
1772           e = EDGE_PRED (guard_to, 0);
1773           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1774           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1775           scale_loop_profile (epilog, prob_vector, bound_epilog);
1776         }
1777
1778       tree niters_vector_mult_vf;
1779       /* If loop is peeled for non-zero constant times, now niters refers to
1780          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1781          overflows.  */
1782       niters_no_overflow |= (prolog_peeling > 0);
1783       vect_gen_vector_loop_niters (loop_vinfo, niters,
1784                                    niters_vector, niters_no_overflow);
1785       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1786                                            &niters_vector_mult_vf);
1787       /* Update IVs of original loop as if they were advanced by
1788          niters_vector_mult_vf steps.  */
1789       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1790       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1791       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1792                                         update_e);
1793
1794       if (skip_epilog)
1795         {
1796           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1797                                     niters, niters_vector_mult_vf);
1798           guard_bb = single_exit (loop)->dest;
1799           guard_to = split_edge (single_exit (epilog));
1800           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1801                                            skip_vector ? anchor : guard_bb,
1802                                            inverse_probability (prob_epilog));
1803           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1804                                               single_exit (epilog));
1805           scale_loop_profile (epilog, prob_epilog, bound);
1806         }
1807       else
1808         slpeel_update_phi_nodes_for_lcssa (epilog);
1809
1810       bound = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf * 2 : vf) - 2;
1811       /* We share epilog loop with scalar version loop.  */
1812       bound_epilog = MAX (bound, bound_epilog - 1);
1813       record_niter_bound (epilog, bound_epilog, false, true);
1814
1815       delete_update_ssa ();
1816       adjust_vec_debug_stmts ();
1817       scev_reset ();
1818     }
1819   adjust_vec.release ();
1820   free_original_copy_tables ();
1821 }
1822
1823 /* Function vect_create_cond_for_niters_checks.
1824
1825    Create a conditional expression that represents the run-time checks for
1826    loop's niter.  The loop is guaranteed to to terminate if the run-time
1827    checks hold.
1828
1829    Input:
1830    COND_EXPR  - input conditional expression.  New conditions will be chained
1831                 with logical AND operation.  If it is NULL, then the function
1832                 is used to return the number of alias checks.
1833    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1834                 to be checked.
1835
1836    Output:
1837    COND_EXPR - conditional expression.
1838
1839    The returned COND_EXPR is the conditional expression to be used in the
1840    if statement that controls which version of the loop gets executed at
1841    runtime.  */
1842
1843 static void
1844 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1845 {
1846   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1847
1848   if (*cond_expr)
1849     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1850                               *cond_expr, part_cond_expr);
1851   else
1852     *cond_expr = part_cond_expr;
1853 }
1854
1855 /* Function vect_create_cond_for_align_checks.
1856
1857    Create a conditional expression that represents the alignment checks for
1858    all of data references (array element references) whose alignment must be
1859    checked at runtime.
1860
1861    Input:
1862    COND_EXPR  - input conditional expression.  New conditions will be chained
1863                 with logical AND operation.
1864    LOOP_VINFO - two fields of the loop information are used.
1865                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1866                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1867
1868    Output:
1869    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1870                          expression.
1871    The returned value is the conditional expression to be used in the if
1872    statement that controls which version of the loop gets executed at runtime.
1873
1874    The algorithm makes two assumptions:
1875      1) The number of bytes "n" in a vector is a power of 2.
1876      2) An address "a" is aligned if a%n is zero and that this
1877         test can be done as a&(n-1) == 0.  For example, for 16
1878         byte vectors the test is a&0xf == 0.  */
1879
1880 static void
1881 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1882                                    tree *cond_expr,
1883                                    gimple_seq *cond_expr_stmt_list)
1884 {
1885   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1886   vec<gimple *> may_misalign_stmts
1887     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1888   gimple *ref_stmt;
1889   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1890   tree mask_cst;
1891   unsigned int i;
1892   tree int_ptrsize_type;
1893   char tmp_name[20];
1894   tree or_tmp_name = NULL_TREE;
1895   tree and_tmp_name;
1896   gimple *and_stmt;
1897   tree ptrsize_zero;
1898   tree part_cond_expr;
1899
1900   /* Check that mask is one less than a power of 2, i.e., mask is
1901      all zeros followed by all ones.  */
1902   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1903
1904   int_ptrsize_type = signed_type_for (ptr_type_node);
1905
1906   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1907      of the first vector of the i'th data reference. */
1908
1909   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1910     {
1911       gimple_seq new_stmt_list = NULL;
1912       tree addr_base;
1913       tree addr_tmp_name;
1914       tree new_or_tmp_name;
1915       gimple *addr_stmt, *or_stmt;
1916       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1917       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1918       bool negative = tree_int_cst_compare
1919         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1920       tree offset = negative
1921         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1922
1923       /* create: addr_tmp = (int)(address_of_first_vector) */
1924       addr_base =
1925         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1926                                               offset, loop);
1927       if (new_stmt_list != NULL)
1928         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1929
1930       sprintf (tmp_name, "addr2int%d", i);
1931       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1932       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1933       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1934
1935       /* The addresses are OR together.  */
1936
1937       if (or_tmp_name != NULL_TREE)
1938         {
1939           /* create: or_tmp = or_tmp | addr_tmp */
1940           sprintf (tmp_name, "orptrs%d", i);
1941           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1942           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1943                                          or_tmp_name, addr_tmp_name);
1944           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1945           or_tmp_name = new_or_tmp_name;
1946         }
1947       else
1948         or_tmp_name = addr_tmp_name;
1949
1950     } /* end for i */
1951
1952   mask_cst = build_int_cst (int_ptrsize_type, mask);
1953
1954   /* create: and_tmp = or_tmp & mask  */
1955   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1956
1957   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1958                                   or_tmp_name, mask_cst);
1959   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1960
1961   /* Make and_tmp the left operand of the conditional test against zero.
1962      if and_tmp has a nonzero bit then some address is unaligned.  */
1963   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1964   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1965                                 and_tmp_name, ptrsize_zero);
1966   if (*cond_expr)
1967     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1968                               *cond_expr, part_cond_expr);
1969   else
1970     *cond_expr = part_cond_expr;
1971 }
1972
1973 /* Given two data references and segment lengths described by DR_A and DR_B,
1974    create expression checking if the two addresses ranges intersect with
1975    each other based on index of the two addresses.  This can only be done
1976    if DR_A and DR_B referring to the same (array) object and the index is
1977    the only difference.  For example:
1978
1979                        DR_A                           DR_B
1980       data-ref         arr[i]                         arr[j]
1981       base_object      arr                            arr
1982       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1983
1984    The addresses and their index are like:
1985
1986         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1987      ------------------------------------------------------->
1988         |   |   |   |   |          |   |   |   |   |
1989      ------------------------------------------------------->
1990         i_0 ...         i_0+4      j_0 ...         j_0+4
1991
1992    We can create expression based on index rather than address:
1993
1994      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1995
1996    Note evolution step of index needs to be considered in comparison.  */
1997
1998 static bool
1999 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2000                                      const dr_with_seg_len& dr_a,
2001                                      const dr_with_seg_len& dr_b)
2002 {
2003   if (integer_zerop (DR_STEP (dr_a.dr))
2004       || integer_zerop (DR_STEP (dr_b.dr))
2005       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2006     return false;
2007
2008   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2009     return false;
2010
2011   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2012     return false;
2013
2014   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2015     return false;
2016
2017   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2018     return false;
2019
2020   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2021
2022   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2023   unsigned HOST_WIDE_INT abs_step
2024     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2025
2026   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2027   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2028   /* Infer the number of iterations with which the memory segment is accessed
2029      by DR.  In other words, alias is checked if memory segment accessed by
2030      DR_A in some iterations intersect with memory segment accessed by DR_B
2031      in the same amount iterations.
2032      Note segnment length is a linear function of number of iterations with
2033      DR_STEP as the coefficient.  */
2034   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2035   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2036
2037   unsigned int i;
2038   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2039   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2040     {
2041       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2042       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2043       /* Two indices must be the same if they are not scev, or not scev wrto
2044          current loop being vecorized.  */
2045       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2046           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2047           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2048           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2049         {
2050           if (operand_equal_p (access1, access2, 0))
2051             continue;
2052
2053           return false;
2054         }
2055       /* The two indices must have the same step.  */
2056       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2057         return false;
2058
2059       tree idx_step = CHREC_RIGHT (access1);
2060       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2061       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2062       /* Index must evaluate in the same direction as DR.  */
2063       gcc_assert (!neg_step
2064                   || tree_int_cst_compare (idx_step, size_zero_node) < 0);
2065
2066       tree min1 = CHREC_LEFT (access1);
2067       tree min2 = CHREC_LEFT (access2);
2068       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2069         return false;
2070
2071       /* Ideally, alias can be checked against loop's control IV, but we
2072          need to prove linear mapping between control IV and reference
2073          index.  Although that should be true, we check against (array)
2074          index of data reference.  Like segment length, index length is
2075          linear function of the number of iterations with index_step as
2076          the coefficient, i.e, niter_len * idx_step.  */
2077       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2078                                    build_int_cst (TREE_TYPE (min1),
2079                                                   niter_len1));
2080       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2081                                    build_int_cst (TREE_TYPE (min2),
2082                                                   niter_len2));
2083       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2084       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2085       /* Adjust ranges for negative step.  */
2086       if (neg_step)
2087         {
2088           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2089           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2090                               CHREC_LEFT (access1), idx_step);
2091           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2092           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2093                               CHREC_LEFT (access2), idx_step);
2094         }
2095       tree part_cond_expr
2096         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2097             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2098             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2099       if (*cond_expr)
2100         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2101                                   *cond_expr, part_cond_expr);
2102       else
2103         *cond_expr = part_cond_expr;
2104     }
2105   return true;
2106 }
2107
2108 /* Given two data references and segment lengths described by DR_A and DR_B,
2109    create expression checking if the two addresses ranges intersect with
2110    each other:
2111
2112      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2113      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2114
2115 static void
2116 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2117                                const dr_with_seg_len& dr_a,
2118                                const dr_with_seg_len& dr_b)
2119 {
2120   *cond_expr = NULL_TREE;
2121   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2122     return;
2123
2124   tree segment_length_a = dr_a.seg_len;
2125   tree segment_length_b = dr_b.seg_len;
2126   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2127   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2128   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2129
2130   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2131                           offset_a, DR_INIT (dr_a.dr));
2132   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2133                           offset_b, DR_INIT (dr_b.dr));
2134   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2135   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2136
2137   tree seg_a_min = addr_base_a;
2138   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2139   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2140      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2141      [a, a+12) */
2142   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2143     {
2144       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2145       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2146       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2147     }
2148
2149   tree seg_b_min = addr_base_b;
2150   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2151   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2152     {
2153       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2154       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2155       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2156     }
2157   *cond_expr
2158     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2159         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2160         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2161 }
2162
2163 /* Function vect_create_cond_for_alias_checks.
2164
2165    Create a conditional expression that represents the run-time checks for
2166    overlapping of address ranges represented by a list of data references
2167    relations passed as input.
2168
2169    Input:
2170    COND_EXPR  - input conditional expression.  New conditions will be chained
2171                 with logical AND operation.  If it is NULL, then the function
2172                 is used to return the number of alias checks.
2173    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2174                 to be checked.
2175
2176    Output:
2177    COND_EXPR - conditional expression.
2178
2179    The returned COND_EXPR is the conditional expression to be used in the if
2180    statement that controls which version of the loop gets executed at runtime.
2181 */
2182
2183 void
2184 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2185 {
2186   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2187     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2188   tree part_cond_expr;
2189
2190   if (comp_alias_ddrs.is_empty ())
2191     return;
2192
2193   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2194     {
2195       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2196       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2197
2198       if (dump_enabled_p ())
2199         {
2200           dump_printf_loc (MSG_NOTE, vect_location,
2201                            "create runtime check for data references ");
2202           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2203           dump_printf (MSG_NOTE, " and ");
2204           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2205           dump_printf (MSG_NOTE, "\n");
2206         }
2207
2208       /* Create condition expression for each pair data references.  */
2209       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2210       if (*cond_expr)
2211         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2212                                   *cond_expr, part_cond_expr);
2213       else
2214         *cond_expr = part_cond_expr;
2215     }
2216
2217   if (dump_enabled_p ())
2218     dump_printf_loc (MSG_NOTE, vect_location,
2219                      "created %u versioning for alias checks.\n",
2220                      comp_alias_ddrs.length ());
2221 }
2222
2223
2224 /* Function vect_loop_versioning.
2225
2226    If the loop has data references that may or may not be aligned or/and
2227    has data reference relations whose independence was not proven then
2228    two versions of the loop need to be generated, one which is vectorized
2229    and one which isn't.  A test is then generated to control which of the
2230    loops is executed.  The test checks for the alignment of all of the
2231    data references that may or may not be aligned.  An additional
2232    sequence of runtime tests is generated for each pairs of DDRs whose
2233    independence was not proven.  The vectorized version of loop is
2234    executed only if both alias and alignment tests are passed.
2235
2236    The test generated to check which version of loop is executed
2237    is modified to also check for profitability as indicated by the
2238    cost model threshold TH.
2239
2240    The versioning precondition(s) are placed in *COND_EXPR and
2241    *COND_EXPR_STMT_LIST.  */
2242
2243 void
2244 vect_loop_versioning (loop_vec_info loop_vinfo,
2245                       unsigned int th, bool check_profitability)
2246 {
2247   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2248   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2249   basic_block condition_bb;
2250   gphi_iterator gsi;
2251   gimple_stmt_iterator cond_exp_gsi;
2252   basic_block merge_bb;
2253   basic_block new_exit_bb;
2254   edge new_exit_e, e;
2255   gphi *orig_phi, *new_phi;
2256   tree cond_expr = NULL_TREE;
2257   gimple_seq cond_expr_stmt_list = NULL;
2258   tree arg;
2259   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2260   gimple_seq gimplify_stmt_list = NULL;
2261   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2262   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2263   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2264   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2265
2266   if (check_profitability)
2267     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2268                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2269                                                        th));
2270
2271   if (version_niter)
2272     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2273
2274   if (cond_expr)
2275     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2276                                         is_gimple_condexpr, NULL_TREE);
2277
2278   if (version_align)
2279     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2280                                        &cond_expr_stmt_list);
2281
2282   if (version_alias)
2283     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2284
2285   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2286                                       is_gimple_condexpr, NULL_TREE);
2287   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2288
2289   initialize_original_copy_tables ();
2290   if (scalar_loop)
2291     {
2292       edge scalar_e;
2293       basic_block preheader, scalar_preheader;
2294
2295       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2296          scale LOOP's frequencies instead.  */
2297       nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2298                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2299       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2300       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2301          while we need to move it above LOOP's preheader.  */
2302       e = loop_preheader_edge (loop);
2303       scalar_e = loop_preheader_edge (scalar_loop);
2304       gcc_assert (empty_block_p (e->src)
2305                   && single_pred_p (e->src));
2306       gcc_assert (empty_block_p (scalar_e->src)
2307                   && single_pred_p (scalar_e->src));
2308       gcc_assert (single_pred_p (condition_bb));
2309       preheader = e->src;
2310       scalar_preheader = scalar_e->src;
2311       scalar_e = find_edge (condition_bb, scalar_preheader);
2312       e = single_pred_edge (preheader);
2313       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2314                                       scalar_preheader);
2315       redirect_edge_and_branch_force (scalar_e, preheader);
2316       redirect_edge_and_branch_force (e, condition_bb);
2317       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2318                                single_pred (condition_bb));
2319       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2320                                single_pred (scalar_preheader));
2321       set_immediate_dominator (CDI_DOMINATORS, preheader,
2322                                condition_bb);
2323     }
2324   else
2325     nloop = loop_version (loop, cond_expr, &condition_bb,
2326                           prob, prob, REG_BR_PROB_BASE - prob, true);
2327
2328   if (version_niter)
2329     {
2330       /* The versioned loop could be infinite, we need to clear existing
2331          niter information which is copied from the original loop.  */
2332       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2333       vect_free_loop_info_assumptions (nloop);
2334       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2335       loop_constraint_set (loop, LOOP_C_INFINITE);
2336     }
2337
2338   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2339       && dump_enabled_p ())
2340     {
2341       if (version_alias)
2342         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2343                          "loop versioned for vectorization because of "
2344                          "possible aliasing\n");
2345       if (version_align)
2346         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2347                          "loop versioned for vectorization to enhance "
2348                          "alignment\n");
2349
2350     }
2351   free_original_copy_tables ();
2352
2353   /* Loop versioning violates an assumption we try to maintain during
2354      vectorization - that the loop exit block has a single predecessor.
2355      After versioning, the exit block of both loop versions is the same
2356      basic block (i.e. it has two predecessors). Just in order to simplify
2357      following transformations in the vectorizer, we fix this situation
2358      here by adding a new (empty) block on the exit-edge of the loop,
2359      with the proper loop-exit phis to maintain loop-closed-form.
2360      If loop versioning wasn't done from loop, but scalar_loop instead,
2361      merge_bb will have already just a single successor.  */
2362
2363   merge_bb = single_exit (loop)->dest;
2364   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2365     {
2366       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2367       new_exit_bb = split_edge (single_exit (loop));
2368       new_exit_e = single_exit (loop);
2369       e = EDGE_SUCC (new_exit_bb, 0);
2370
2371       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2372         {
2373           tree new_res;
2374           orig_phi = gsi.phi ();
2375           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2376           new_phi = create_phi_node (new_res, new_exit_bb);
2377           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2378           add_phi_arg (new_phi, arg, new_exit_e,
2379                        gimple_phi_arg_location_from_edge (orig_phi, e));
2380           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2381         }
2382     }
2383
2384   /* End loop-exit-fixes after versioning.  */
2385
2386   if (cond_expr_stmt_list)
2387     {
2388       cond_exp_gsi = gsi_last_bb (condition_bb);
2389       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2390                              GSI_SAME_STMT);
2391     }
2392   update_ssa (TODO_update_ssa);
2393 }