gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI argumnets
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src)
 106           && (!rename_from_outer_loop || e->src != outer_loop->header))
 107         continue;
 108       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 109            gsi_next (&gsi))
 110         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 111     }
 112 }
 113
 114
 115 struct adjust_info
 116 {
 117   tree from, to;
 118   basic_block bb;
 119 };
 120
 121 /* A stack of values to be adjusted in debug stmts.  We have to
 122    process them LIFO, so that the closest substitution applies.  If we
 123    processed them FIFO, without the stack, we might substitute uses
 124    with a PHI DEF that would soon become non-dominant, and when we got
 125    to the suitable one, it wouldn't have anything to substitute any
 126    more.  */
 127 static vec<adjust_info, va_heap> adjust_vec;
 128
 129 /* Adjust any debug stmts that referenced AI->from values to use the
 130    loop-closed AI->to, if the references are dominated by AI->bb and
 131    not by the definition of AI->from.  */
 132
 133 static void
 134 adjust_debug_stmts_now (adjust_info *ai)
 135 {
 136   basic_block bbphi = ai->bb;
 137   tree orig_def = ai->from;
 138   tree new_def = ai->to;
 139   imm_use_iterator imm_iter;
 140   gimple *stmt;
 141   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 142
 143   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 144
 145   /* Adjust any debug stmts that held onto non-loop-closed
 146      references.  */
 147   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 148     {
 149       use_operand_p use_p;
 150       basic_block bbuse;
 151
 152       if (!is_gimple_debug (stmt))
 153         continue;
 154
 155       gcc_assert (gimple_debug_bind_p (stmt));
 156
 157       bbuse = gimple_bb (stmt);
 158
 159       if ((bbuse == bbphi
 160            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 161           && !(bbuse == bbdef
 162                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 163         {
 164           if (new_def)
 165             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 166               SET_USE (use_p, new_def);
 167           else
 168             {
 169               gimple_debug_bind_reset_value (stmt);
 170               update_stmt (stmt);
 171             }
 172         }
 173     }
 174 }
 175
 176 /* Adjust debug stmts as scheduled before.  */
 177
 178 static void
 179 adjust_vec_debug_stmts (void)
 180 {
 181   if (!MAY_HAVE_DEBUG_STMTS)
 182     return;
 183
 184   gcc_assert (adjust_vec.exists ());
 185
 186   while (!adjust_vec.is_empty ())
 187     {
 188       adjust_debug_stmts_now (&adjust_vec.last ());
 189       adjust_vec.pop ();
 190     }
 191 }
 192
 193 /* Adjust any debug stmts that referenced FROM values to use the
 194    loop-closed TO, if the references are dominated by BB and not by
 195    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 196    will be postponed until adjust_vec_debug_stmts is called.  */
 197
 198 static void
 199 adjust_debug_stmts (tree from, tree to, basic_block bb)
 200 {
 201   adjust_info ai;
 202
 203   if (MAY_HAVE_DEBUG_STMTS
 204       && TREE_CODE (from) == SSA_NAME
 205       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 206       && ! virtual_operand_p (from))
 207     {
 208       ai.from = from;
 209       ai.to = to;
 210       ai.bb = bb;
 211
 212       if (adjust_vec.exists ())
 213         adjust_vec.safe_push (ai);
 214       else
 215         adjust_debug_stmts_now (&ai);
 216     }
 217 }
 218
 219 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 220    to adjust any debug stmts that referenced the old phi arg,
 221    presumably non-loop-closed references left over from other
 222    transformations.  */
 223
 224 static void
 225 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 226 {
 227   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 228
 229   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 230
 231   if (MAY_HAVE_DEBUG_STMTS)
 232     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 233                         gimple_bb (update_phi));
 234 }
 235
 236 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 237    that starts at zero, increases by one and its limit is NITERS.
 238
 239    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 240
 241 void
 242 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 243 {
 244   tree indx_before_incr, indx_after_incr;
 245   gcond *cond_stmt;
 246   gcond *orig_cond;
 247   edge exit_edge = single_exit (loop);
 248   gimple_stmt_iterator loop_cond_gsi;
 249   gimple_stmt_iterator incr_gsi;
 250   bool insert_after;
 251   tree init = build_int_cst (TREE_TYPE (niters), 0);
 252   tree step = build_int_cst (TREE_TYPE (niters), 1);
 253   source_location loop_loc;
 254   enum tree_code code;
 255
 256   orig_cond = get_loop_exit_condition (loop);
 257   gcc_assert (orig_cond);
 258   loop_cond_gsi = gsi_for_stmt (orig_cond);
 259
 260   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 261   create_iv (init, step, NULL_TREE, loop,
 262              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 263
 264   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 265                                               true, NULL_TREE, true,
 266                                               GSI_SAME_STMT);
 267   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 268                                      true, GSI_SAME_STMT);
 269
 270   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 271   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 272                                  NULL_TREE);
 273
 274   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 275
 276   /* Remove old loop exit test:  */
 277   gsi_remove (&loop_cond_gsi, true);
 278   free_stmt_vec_info (orig_cond);
 279
 280   loop_loc = find_loop_location (loop);
 281   if (dump_enabled_p ())
 282     {
 283       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 284         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 285                      LOCATION_LINE (loop_loc));
 286       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 287     }
 288
 289   /* Record the number of latch iterations.  */
 290   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 291                                      build_int_cst (TREE_TYPE (niters), 1));
 292 }
 293
 294 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 295    For all PHI arguments in FROM->dest and TO->dest from those
 296    edges ensure that TO->dest PHI arguments have current_def
 297    to that in from.  */
 298
 299 static void
 300 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 301 {
 302   gimple_stmt_iterator gsi_from, gsi_to;
 303
 304   for (gsi_from = gsi_start_phis (from->dest),
 305        gsi_to = gsi_start_phis (to->dest);
 306        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 307     {
 308       gimple *from_phi = gsi_stmt (gsi_from);
 309       gimple *to_phi = gsi_stmt (gsi_to);
 310       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 311       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 312       if (virtual_operand_p (from_arg))
 313         {
 314           gsi_next (&gsi_from);
 315           continue;
 316         }
 317       if (virtual_operand_p (to_arg))
 318         {
 319           gsi_next (&gsi_to);
 320           continue;
 321         }
 322       if (TREE_CODE (from_arg) != SSA_NAME)
 323         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 324       else
 325         {
 326           if (get_current_def (to_arg) == NULL_TREE)
 327             set_current_def (to_arg, get_current_def (from_arg));
 328         }
 329       gsi_next (&gsi_from);
 330       gsi_next (&gsi_to);
 331     }
 332
 333   gphi *from_phi = get_virtual_phi (from->dest);
 334   gphi *to_phi = get_virtual_phi (to->dest);
 335   if (from_phi)
 336     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 337                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 338 }
 339
 340
 341 /* Given LOOP this function generates a new copy of it and puts it
 342    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 343    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 344    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 345    entry or exit of LOOP.  */
 346
 347 struct loop *
 348 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 349                                         struct loop *scalar_loop, edge e)
 350 {
 351   struct loop *new_loop;
 352   basic_block *new_bbs, *bbs, *pbbs;
 353   bool at_exit;
 354   bool was_imm_dom;
 355   basic_block exit_dest;
 356   edge exit, new_exit;
 357   bool duplicate_outer_loop = false;
 358
 359   exit = single_exit (loop);
 360   at_exit = (e == exit);
 361   if (!at_exit && e != loop_preheader_edge (loop))
 362     return NULL;
 363
 364   if (scalar_loop == NULL)
 365     scalar_loop = loop;
 366
 367   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 368   pbbs = bbs + 1;
 369   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 370   /* Allow duplication of outer loops.  */
 371   if (scalar_loop->inner)
 372     duplicate_outer_loop = true;
 373   /* Check whether duplication is possible.  */
 374   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 375     {
 376       free (bbs);
 377       return NULL;
 378     }
 379
 380   /* Generate new loop structure.  */
 381   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 382   duplicate_subloops (scalar_loop, new_loop);
 383
 384   exit_dest = exit->dest;
 385   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 386                                           exit_dest) == loop->header ?
 387                  true : false);
 388
 389   /* Also copy the pre-header, this avoids jumping through hoops to
 390      duplicate the loop entry PHI arguments.  Create an empty
 391      pre-header unconditionally for this.  */
 392   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 393   edge entry_e = single_pred_edge (preheader);
 394   bbs[0] = preheader;
 395   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 396
 397   exit = single_exit (scalar_loop);
 398   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 399             &exit, 1, &new_exit, NULL,
 400             at_exit ? loop->latch : e->src, true);
 401   exit = single_exit (loop);
 402   basic_block new_preheader = new_bbs[0];
 403
 404   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 405
 406   if (scalar_loop != loop)
 407     {
 408       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 409          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 410          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 411          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 412          header) to have current_def set, so copy them over.  */
 413       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 414                                                 exit);
 415       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 416                                                            0),
 417                                                 EDGE_SUCC (loop->latch, 0));
 418     }
 419
 420   if (at_exit) /* Add the loop copy at exit.  */
 421     {
 422       if (scalar_loop != loop)
 423         {
 424           gphi_iterator gsi;
 425           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 426
 427           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 428                gsi_next (&gsi))
 429             {
 430               gphi *phi = gsi.phi ();
 431               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 432               location_t orig_locus
 433                 = gimple_phi_arg_location_from_edge (phi, e);
 434
 435               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 436             }
 437         }
 438       redirect_edge_and_branch_force (e, new_preheader);
 439       flush_pending_stmts (e);
 440       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 441       if (was_imm_dom || duplicate_outer_loop)
 442         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 443
 444       /* And remove the non-necessary forwarder again.  Keep the other
 445          one so we have a proper pre-header for the loop at the exit edge.  */
 446       redirect_edge_pred (single_succ_edge (preheader),
 447                           single_pred (preheader));
 448       delete_basic_block (preheader);
 449       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 450                                loop_preheader_edge (scalar_loop)->src);
 451     }
 452   else /* Add the copy at entry.  */
 453     {
 454       if (scalar_loop != loop)
 455         {
 456           /* Remove the non-necessary forwarder of scalar_loop again.  */
 457           redirect_edge_pred (single_succ_edge (preheader),
 458                               single_pred (preheader));
 459           delete_basic_block (preheader);
 460           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 461                                    loop_preheader_edge (scalar_loop)->src);
 462           preheader = split_edge (loop_preheader_edge (loop));
 463           entry_e = single_pred_edge (preheader);
 464         }
 465
 466       redirect_edge_and_branch_force (entry_e, new_preheader);
 467       flush_pending_stmts (entry_e);
 468       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 469
 470       redirect_edge_and_branch_force (new_exit, preheader);
 471       flush_pending_stmts (new_exit);
 472       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 473
 474       /* And remove the non-necessary forwarder again.  Keep the other
 475          one so we have a proper pre-header for the loop at the exit edge.  */
 476       redirect_edge_pred (single_succ_edge (new_preheader),
 477                           single_pred (new_preheader));
 478       delete_basic_block (new_preheader);
 479       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 480                                loop_preheader_edge (new_loop)->src);
 481     }
 482
 483   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 484     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 485
 486   if (scalar_loop != loop)
 487     {
 488       /* Update new_loop->header PHIs, so that on the preheader
 489          edge they are the ones from loop rather than scalar_loop.  */
 490       gphi_iterator gsi_orig, gsi_new;
 491       edge orig_e = loop_preheader_edge (loop);
 492       edge new_e = loop_preheader_edge (new_loop);
 493
 494       for (gsi_orig = gsi_start_phis (loop->header),
 495            gsi_new = gsi_start_phis (new_loop->header);
 496            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 497            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 498         {
 499           gphi *orig_phi = gsi_orig.phi ();
 500           gphi *new_phi = gsi_new.phi ();
 501           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 502           location_t orig_locus
 503             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 504
 505           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 506         }
 507     }
 508
 509   free (new_bbs);
 510   free (bbs);
 511
 512   checking_verify_dominators (CDI_DOMINATORS);
 513
 514   return new_loop;
 515 }
 516
 517
 518 /* Given the condition expression COND, put it as the last statement of
 519    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 520    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 521    skip the loop.  PROBABILITY is the skip edge's probability.  */
 522
 523 static edge
 524 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 525                        basic_block guard_to, basic_block dom_bb,
 526                        int probability)
 527 {
 528   gimple_stmt_iterator gsi;
 529   edge new_e, enter_e;
 530   gcond *cond_stmt;
 531   gimple_seq gimplify_stmt_list = NULL;
 532
 533   enter_e = EDGE_SUCC (guard_bb, 0);
 534   enter_e->flags &= ~EDGE_FALLTHRU;
 535   enter_e->flags |= EDGE_FALSE_VALUE;
 536   gsi = gsi_last_bb (guard_bb);
 537
 538   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 539                                  NULL_TREE);
 540   if (gimplify_stmt_list)
 541     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 542
 543   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 544   gsi = gsi_last_bb (guard_bb);
 545   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 546
 547   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 548   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 549
 550   new_e->count = guard_bb->count;
 551   new_e->probability = probability;
 552   new_e->count = apply_probability (enter_e->count, probability);
 553   enter_e->count -= new_e->count;
 554   enter_e->probability = inverse_probability (probability);
 555   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 556   return new_e;
 557 }
 558
 559
 560 /* This function verifies that the following restrictions apply to LOOP:
 561    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 562        for innermost loop and 5 basic blocks for outer-loop.
 563    (2) it is single entry, single exit
 564    (3) its exit condition is the last stmt in the header
 565    (4) E is the entry/exit edge of LOOP.
 566  */
 567
 568 bool
 569 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 570 {
 571   edge exit_e = single_exit (loop);
 572   edge entry_e = loop_preheader_edge (loop);
 573   gcond *orig_cond = get_loop_exit_condition (loop);
 574   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 575   unsigned int num_bb = loop->inner? 5 : 2;
 576
 577   /* All loops have an outer scope; the only case loop->outer is NULL is for
 578      the function itself.  */
 579   if (!loop_outer (loop)
 580       || loop->num_nodes != num_bb
 581       || !empty_block_p (loop->latch)
 582       || !single_exit (loop)
 583       /* Verify that new loop exit condition can be trivially modified.  */
 584       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 585       || (e != exit_e && e != entry_e))
 586     return false;
 587
 588   return true;
 589 }
 590
 591 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 592    in the exit bb and rename all the uses after the loop.  This simplifies
 593    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 594    (but normally loop closed SSA form doesn't require virtual PHIs to be
 595    in the same form).  Doing this early simplifies the checking what
 596    uses should be renamed.  */
 597
 598 static void
 599 create_lcssa_for_virtual_phi (struct loop *loop)
 600 {
 601   gphi_iterator gsi;
 602   edge exit_e = single_exit (loop);
 603
 604   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 605     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 606       {
 607         gphi *phi = gsi.phi ();
 608         for (gsi = gsi_start_phis (exit_e->dest);
 609              !gsi_end_p (gsi); gsi_next (&gsi))
 610           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 611             break;
 612         if (gsi_end_p (gsi))
 613           {
 614             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 615             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 616             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 617             imm_use_iterator imm_iter;
 618             gimple *stmt;
 619             use_operand_p use_p;
 620
 621             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 622             gimple_phi_set_result (new_phi, new_vop);
 623             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 624               if (stmt != new_phi
 625                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 626                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 627                   SET_USE (use_p, new_vop);
 628           }
 629         break;
 630       }
 631
 632 }
 633
 634 /* Function vect_get_loop_location.
 635
 636    Extract the location of the loop in the source code.
 637    If the loop is not well formed for vectorization, an estimated
 638    location is calculated.
 639    Return the loop location if succeed and NULL if not.  */
 640
 641 source_location
 642 find_loop_location (struct loop *loop)
 643 {
 644   gimple *stmt = NULL;
 645   basic_block bb;
 646   gimple_stmt_iterator si;
 647
 648   if (!loop)
 649     return UNKNOWN_LOCATION;
 650
 651   stmt = get_loop_exit_condition (loop);
 652
 653   if (stmt
 654       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 655     return gimple_location (stmt);
 656
 657   /* If we got here the loop is probably not "well formed",
 658      try to estimate the loop location */
 659
 660   if (!loop->header)
 661     return UNKNOWN_LOCATION;
 662
 663   bb = loop->header;
 664
 665   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 666     {
 667       stmt = gsi_stmt (si);
 668       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 669         return gimple_location (stmt);
 670     }
 671
 672   return UNKNOWN_LOCATION;
 673 }
 674
 675 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 676
 677 static bool
 678 iv_phi_p (gphi *phi)
 679 {
 680   if (virtual_operand_p (PHI_RESULT (phi)))
 681     return false;
 682
 683   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 684   gcc_assert (stmt_info != NULL);
 685   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 686       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 687     return false;
 688
 689   return true;
 690 }
 691
 692 /* Function vect_can_advance_ivs_p
 693
 694    In case the number of iterations that LOOP iterates is unknown at compile
 695    time, an epilog loop will be generated, and the loop induction variables
 696    (IVs) will be "advanced" to the value they are supposed to take just before
 697    the epilog loop.  Here we check that the access function of the loop IVs
 698    and the expression that represents the loop bound are simple enough.
 699    These restrictions will be relaxed in the future.  */
 700
 701 bool
 702 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 703 {
 704   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 705   basic_block bb = loop->header;
 706   gphi_iterator gsi;
 707
 708   /* Analyze phi functions of the loop header.  */
 709
 710   if (dump_enabled_p ())
 711     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 712   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 713     {
 714       tree evolution_part;
 715
 716       gphi *phi = gsi.phi ();
 717       if (dump_enabled_p ())
 718         {
 719           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 720           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 721         }
 722
 723       /* Skip virtual phi's. The data dependences that are associated with
 724          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 725
 726          Skip reduction phis.  */
 727       if (!iv_phi_p (phi))
 728         {
 729           if (dump_enabled_p ())
 730             dump_printf_loc (MSG_NOTE, vect_location,
 731                              "reduc or virtual phi. skip.\n");
 732           continue;
 733         }
 734
 735       /* Analyze the evolution function.  */
 736
 737       evolution_part
 738         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 739       if (evolution_part == NULL_TREE)
 740         {
 741           if (dump_enabled_p ())
 742             dump_printf (MSG_MISSED_OPTIMIZATION,
 743                          "No access function or evolution.\n");
 744           return false;
 745         }
 746
 747       /* FORNOW: We do not transform initial conditions of IVs
 748          which evolution functions are not invariants in the loop.  */
 749
 750       if (!expr_invariant_in_loop_p (loop, evolution_part))
 751         {
 752           if (dump_enabled_p ())
 753             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 754                              "evolution not invariant in loop.\n");
 755           return false;
 756         }
 757
 758       /* FORNOW: We do not transform initial conditions of IVs
 759          which evolution functions are a polynomial of degree >= 2.  */
 760
 761       if (tree_is_chrec (evolution_part))
 762         {
 763           if (dump_enabled_p ())
 764             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 765                              "evolution is chrec.\n");
 766           return false;
 767         }
 768     }
 769
 770   return true;
 771 }
 772
 773
 774 /*   Function vect_update_ivs_after_vectorizer.
 775
 776      "Advance" the induction variables of LOOP to the value they should take
 777      after the execution of LOOP.  This is currently necessary because the
 778      vectorizer does not handle induction variables that are used after the
 779      loop.  Such a situation occurs when the last iterations of LOOP are
 780      peeled, because:
 781      1. We introduced new uses after LOOP for IVs that were not originally used
 782         after LOOP: the IVs of LOOP are now used by an epilog loop.
 783      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 784         times, whereas the loop IVs should be bumped N times.
 785
 786      Input:
 787      - LOOP - a loop that is going to be vectorized. The last few iterations
 788               of LOOP were peeled.
 789      - NITERS - the number of iterations that LOOP executes (before it is
 790                 vectorized). i.e, the number of times the ivs should be bumped.
 791      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 792                   coming out from LOOP on which there are uses of the LOOP ivs
 793                   (this is the path from LOOP->exit to epilog_loop->preheader).
 794
 795                   The new definitions of the ivs are placed in LOOP->exit.
 796                   The phi args associated with the edge UPDATE_E in the bb
 797                   UPDATE_E->dest are updated accordingly.
 798
 799      Assumption 1: Like the rest of the vectorizer, this function assumes
 800      a single loop exit that has a single predecessor.
 801
 802      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 803      organized in the same order.
 804
 805      Assumption 3: The access function of the ivs is simple enough (see
 806      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 807
 808      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 809      coming out of LOOP on which the ivs of LOOP are used (this is the path
 810      that leads to the epilog loop; other paths skip the epilog loop).  This
 811      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 812      needs to have its phis updated.
 813  */
 814
 815 static void
 816 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 817                                   tree niters, edge update_e)
 818 {
 819   gphi_iterator gsi, gsi1;
 820   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 821   basic_block update_bb = update_e->dest;
 822   basic_block exit_bb = single_exit (loop)->dest;
 823
 824   /* Make sure there exists a single-predecessor exit bb:  */
 825   gcc_assert (single_pred_p (exit_bb));
 826   gcc_assert (single_succ_edge (exit_bb) == update_e);
 827
 828   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 829        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 830        gsi_next (&gsi), gsi_next (&gsi1))
 831     {
 832       tree init_expr;
 833       tree step_expr, off;
 834       tree type;
 835       tree var, ni, ni_name;
 836       gimple_stmt_iterator last_gsi;
 837
 838       gphi *phi = gsi.phi ();
 839       gphi *phi1 = gsi1.phi ();
 840       if (dump_enabled_p ())
 841         {
 842           dump_printf_loc (MSG_NOTE, vect_location,
 843                            "vect_update_ivs_after_vectorizer: phi: ");
 844           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 845         }
 846
 847       /* Skip reduction and virtual phis.  */
 848       if (!iv_phi_p (phi))
 849         {
 850           if (dump_enabled_p ())
 851             dump_printf_loc (MSG_NOTE, vect_location,
 852                              "reduc or virtual phi. skip.\n");
 853           continue;
 854         }
 855
 856       type = TREE_TYPE (gimple_phi_result (phi));
 857       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 858       step_expr = unshare_expr (step_expr);
 859
 860       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 861          of degree >= 2 or exponential.  */
 862       gcc_assert (!tree_is_chrec (step_expr));
 863
 864       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 865
 866       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 867                          fold_convert (TREE_TYPE (step_expr), niters),
 868                          step_expr);
 869       if (POINTER_TYPE_P (type))
 870         ni = fold_build_pointer_plus (init_expr, off);
 871       else
 872         ni = fold_build2 (PLUS_EXPR, type,
 873                           init_expr, fold_convert (type, off));
 874
 875       var = create_tmp_var (type, "tmp");
 876
 877       last_gsi = gsi_last_bb (exit_bb);
 878       gimple_seq new_stmts = NULL;
 879       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 880       /* Exit_bb shouldn't be empty.  */
 881       if (!gsi_end_p (last_gsi))
 882         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 883       else
 884         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 885
 886       /* Fix phi expressions in the successor bb.  */
 887       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 888     }
 889 }
 890
 891 /* Function vect_gen_prolog_loop_niters
 892
 893    Generate the number of iterations which should be peeled as prolog for the
 894    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 895    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 896    As a result, after the execution of this loop, the data reference DR will
 897    refer to an aligned location.  The following computation is generated:
 898
 899    If the misalignment of DR is known at compile time:
 900      addr_mis = int mis = DR_MISALIGNMENT (dr);
 901    Else, compute address misalignment in bytes:
 902      addr_mis = addr & (vectype_align - 1)
 903
 904    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 905
 906    (elem_size = element type size; an element is the scalar element whose type
 907    is the inner type of the vectype)
 908
 909    The computations will be emitted at the end of BB.  We also compute and
 910    store upper bound (included) of the result in BOUND.
 911
 912    When the step of the data-ref in the loop is not 1 (as in interleaved data
 913    and SLP), the number of iterations of the prolog must be divided by the step
 914    (which is equal to the size of interleaved group).
 915
 916    The above formulas assume that VF == number of elements in the vector. This
 917    may not hold when there are multiple-types in the loop.
 918    In this case, for some data-references in the loop the VF does not represent
 919    the number of elements that fit in the vector.  Therefore, instead of VF we
 920    use TYPE_VECTOR_SUBPARTS.  */
 921
 922 static tree
 923 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 924                              basic_block bb, int *bound)
 925 {
 926   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 927   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 928   tree var;
 929   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 930   gimple_seq stmts = NULL, new_stmts = NULL;
 931   tree iters, iters_name;
 932   gimple *dr_stmt = DR_STMT (dr);
 933   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 934   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 935   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 936   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 937
 938   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 939     {
 940       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 941
 942       if (dump_enabled_p ())
 943         dump_printf_loc (MSG_NOTE, vect_location,
 944                          "known peeling = %d.\n", npeel);
 945
 946       iters = build_int_cst (niters_type, npeel);
 947       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 948     }
 949   else
 950     {
 951       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 952       tree offset = negative
 953           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 954       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 955                                                 &stmts, offset, loop);
 956       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 957       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 958       HOST_WIDE_INT elem_size =
 959                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 960       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 961       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 962       tree nelements_tree = build_int_cst (type, nelements);
 963       tree byte_misalign;
 964       tree elem_misalign;
 965
 966       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 967       byte_misalign =
 968         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 969                      vectype_align_minus_1);
 970
 971       /* Create:  elem_misalign = byte_misalign / element_size  */
 972       elem_misalign =
 973         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 974
 975       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
 976       if (negative)
 977         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
 978       else
 979         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
 980       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
 981       iters = fold_convert (niters_type, iters);
 982       *bound = nelements - 1;
 983     }
 984
 985   if (dump_enabled_p ())
 986     {
 987       dump_printf_loc (MSG_NOTE, vect_location,
 988                        "niters for prolog loop: ");
 989       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
 990       dump_printf (MSG_NOTE, "\n");
 991     }
 992
 993   var = create_tmp_var (niters_type, "prolog_loop_niters");
 994   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
 995
 996   if (new_stmts)
 997     gimple_seq_add_seq (&stmts, new_stmts);
 998   if (stmts)
 999     {
1000       gcc_assert (single_succ_p (bb));
1001       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1002       if (gsi_end_p (gsi))
1003         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1004       else
1005         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1006     }
1007   return iters_name;
1008 }
1009
1010
1011 /* Function vect_update_init_of_dr
1012
1013    NITERS iterations were peeled from LOOP.  DR represents a data reference
1014    in LOOP.  This function updates the information recorded in DR to
1015    account for the fact that the first NITERS iterations had already been
1016    executed.  Specifically, it updates the OFFSET field of DR.  */
1017
1018 static void
1019 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1020 {
1021   tree offset = DR_OFFSET (dr);
1022
1023   niters = fold_build2 (MULT_EXPR, sizetype,
1024                         fold_convert (sizetype, niters),
1025                         fold_convert (sizetype, DR_STEP (dr)));
1026   offset = fold_build2 (PLUS_EXPR, sizetype,
1027                         fold_convert (sizetype, offset), niters);
1028   DR_OFFSET (dr) = offset;
1029 }
1030
1031
1032 /* Function vect_update_inits_of_drs
1033
1034    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1035    This function updates the information recorded for the data references in
1036    the loop to account for the fact that the first NITERS iterations had
1037    already been executed.  Specifically, it updates the initial_condition of
1038    the access_function of all the data_references in the loop.  */
1039
1040 static void
1041 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1042 {
1043   unsigned int i;
1044   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1045   struct data_reference *dr;
1046
1047   if (dump_enabled_p ())
1048     dump_printf_loc (MSG_NOTE, vect_location,
1049                      "=== vect_update_inits_of_dr ===\n");
1050
1051   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1052   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1053     {
1054       gimple_seq seq;
1055       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1056       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1057
1058       niters = fold_convert (sizetype, niters);
1059       niters = force_gimple_operand (niters, &seq, false, var);
1060       if (seq)
1061         {
1062           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1063           gcc_assert (!new_bb);
1064         }
1065     }
1066
1067   FOR_EACH_VEC_ELT (datarefs, i, dr)
1068     vect_update_init_of_dr (dr, niters);
1069 }
1070
1071
1072 /* This function builds ni_name = number of iterations.  Statements
1073    are emitted on the loop preheader edge.  */
1074
1075 tree
1076 vect_build_loop_niters (loop_vec_info loop_vinfo)
1077 {
1078   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1079   if (TREE_CODE (ni) == INTEGER_CST)
1080     return ni;
1081   else
1082     {
1083       tree ni_name, var;
1084       gimple_seq stmts = NULL;
1085       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1086
1087       var = create_tmp_var (TREE_TYPE (ni), "niters");
1088       ni_name = force_gimple_operand (ni, &stmts, false, var);
1089       if (stmts)
1090         gsi_insert_seq_on_edge_immediate (pe, stmts);
1091
1092       return ni_name;
1093     }
1094 }
1095
1096 /* Calculate the number of iterations above which vectorized loop will be
1097    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1098    of prolog loop.  If it's integer const, the integer number is also passed
1099    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1100    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1101    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1102    (rather than vectorized) loop will be executed.  This function stores
1103    upper bound (included) of the result in BOUND_SCALAR.  */
1104
1105 static tree
1106 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1107                              int bound_prolog, int vfm1, int th,
1108                              int *bound_scalar, bool check_profitability)
1109 {
1110   tree type = TREE_TYPE (niters_prolog);
1111   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1112                              build_int_cst (type, vfm1));
1113
1114   *bound_scalar = vfm1 + bound_prolog;
1115   if (check_profitability)
1116     {
1117       /* TH indicates the minimum niters of vectorized loop, while we
1118          compute the maximum niters of scalar loop.  */
1119       th--;
1120       /* Peeling for constant times.  */
1121       if (int_niters_prolog >= 0)
1122         {
1123           *bound_scalar = (int_niters_prolog + vfm1 < th
1124                             ? th
1125                             : vfm1 + int_niters_prolog);
1126           return build_int_cst (type, *bound_scalar);
1127         }
1128       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1129          bound (inlcuded) of niters of prolog loop.  */
1130       if (th >=  vfm1 + bound_prolog)
1131         {
1132           *bound_scalar = th;
1133           return build_int_cst (type, th);
1134         }
1135       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1136       else if (th > vfm1)
1137         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1138     }
1139   return niters;
1140 }
1141
1142 /* This function generates the following statements:
1143
1144    niters = number of iterations loop executes (after peeling)
1145    niters_vector = niters / vf
1146
1147    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1148    true if NITERS doesn't overflow.  */
1149
1150 void
1151 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1152                              tree *niters_vector_ptr, bool niters_no_overflow)
1153 {
1154   tree ni_minus_gap, var;
1155   tree niters_vector;
1156   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1157   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1158   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1159
1160   /* If epilogue loop is required because of data accesses with gaps, we
1161      subtract one iteration from the total number of iterations here for
1162      correct calculation of RATIO.  */
1163   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1164     {
1165       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1166                                   niters,
1167                                   build_one_cst (TREE_TYPE (niters)));
1168       if (!is_gimple_val (ni_minus_gap))
1169         {
1170           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1171           gimple *stmts = NULL;
1172           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1173                                                true, var);
1174           gsi_insert_seq_on_edge_immediate (pe, stmts);
1175         }
1176     }
1177   else
1178     ni_minus_gap = niters;
1179
1180   /* Create: niters >> log2(vf) */
1181   /* If it's known that niters == number of latch executions + 1 doesn't
1182      overflow, we can generate niters >> log2(vf); otherwise we generate
1183      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1184      will be at least one.  */
1185   if (niters_no_overflow)
1186     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1187                                  ni_minus_gap, log_vf);
1188   else
1189     niters_vector
1190       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1191                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1192                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1193                                                ni_minus_gap,
1194                                                build_int_cst
1195                                                  (TREE_TYPE (niters), vf)),
1196                                   log_vf),
1197                      build_int_cst (TREE_TYPE (niters), 1));
1198
1199   if (!is_gimple_val (niters_vector))
1200     {
1201       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1202       gimple *stmts = NULL;
1203       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1204       gsi_insert_seq_on_edge_immediate (pe, stmts);
1205     }
1206   *niters_vector_ptr = niters_vector;
1207
1208   return;
1209 }
1210
1211 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1212    loop specified by LOOP_VINFO after vectorization, compute the number
1213    of iterations before vectorization (niters_vector * vf) and store it
1214    to NITERS_VECTOR_MULT_VF_PTR.  */
1215
1216 static void
1217 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1218                                      tree niters_vector,
1219                                      tree *niters_vector_mult_vf_ptr)
1220 {
1221   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1222   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1223   tree type = TREE_TYPE (niters_vector);
1224   tree log_vf = build_int_cst (type, exact_log2 (vf));
1225   basic_block exit_bb = single_exit (loop)->dest;
1226
1227   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1228   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1229                                             niters_vector, log_vf);
1230   if (!is_gimple_val (niters_vector_mult_vf))
1231     {
1232       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1233       gimple_seq stmts = NULL;
1234       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1235                                                     &stmts, true, var);
1236       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1237       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1238     }
1239   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1240 }
1241
1242 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1243    from SECOND/FIRST and puts it at the original loop's preheader/exit
1244    edge, the two loops are arranged as below:
1245
1246        preheader_a:
1247      first_loop:
1248        header_a:
1249          i_1 = PHI<i_0, i_2>;
1250          ...
1251          i_2 = i_1 + 1;
1252          if (cond_a)
1253            goto latch_a;
1254          else
1255            goto between_bb;
1256        latch_a:
1257          goto header_a;
1258
1259        between_bb:
1260          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1261
1262      second_loop:
1263        header_b:
1264          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1265                                  or with i_2 if no LCSSA phi is created
1266                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1267          ...
1268          i_4 = i_3 + 1;
1269          if (cond_b)
1270            goto latch_b;
1271          else
1272            goto exit_bb;
1273        latch_b:
1274          goto header_b;
1275
1276        exit_bb:
1277
1278    This function creates loop closed SSA for the first loop; update the
1279    second loop's PHI nodes by replacing argument on incoming edge with the
1280    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1281    is false, Loop closed ssa phis will only be created for non-iv phis for
1282    the first loop.
1283
1284    This function assumes exit bb of the first loop is preheader bb of the
1285    second loop, i.e, between_bb in the example code.  With PHIs updated,
1286    the second loop will execute rest iterations of the first.  */
1287
1288 static void
1289 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1290                                    struct loop *first, struct loop *second,
1291                                    bool create_lcssa_for_iv_phis)
1292 {
1293   gphi_iterator gsi_update, gsi_orig;
1294   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1295
1296   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1297   edge second_preheader_e = loop_preheader_edge (second);
1298   basic_block between_bb = single_exit (first)->dest;
1299
1300   gcc_assert (between_bb == second_preheader_e->src);
1301   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1302   /* Either the first loop or the second is the loop to be vectorized.  */
1303   gcc_assert (loop == first || loop == second);
1304
1305   for (gsi_orig = gsi_start_phis (first->header),
1306        gsi_update = gsi_start_phis (second->header);
1307        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1308        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1309     {
1310       gphi *orig_phi = gsi_orig.phi ();
1311       gphi *update_phi = gsi_update.phi ();
1312
1313       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1314       /* Generate lcssa PHI node for the first loop.  */
1315       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1316       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1317         {
1318           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1319           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1320           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1321           arg = new_res;
1322         }
1323
1324       /* Update PHI node in the second loop by replacing arg on the loop's
1325          incoming edge.  */
1326       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1327     }
1328 }
1329
1330 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1331    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1332    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1333    appear like below:
1334
1335        guard_bb:
1336          if (cond)
1337            goto merge_bb;
1338          else
1339            goto skip_loop;
1340
1341      skip_loop:
1342        header_a:
1343          i_1 = PHI<i_0, i_2>;
1344          ...
1345          i_2 = i_1 + 1;
1346          if (cond_a)
1347            goto latch_a;
1348          else
1349            goto exit_a;
1350        latch_a:
1351          goto header_a;
1352
1353        exit_a:
1354          i_5 = PHI<i_2>;
1355
1356        merge_bb:
1357          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1358
1359      update_loop:
1360        header_b:
1361          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1362          ...
1363          i_4 = i_3 + 1;
1364          if (cond_b)
1365            goto latch_b;
1366          else
1367            goto exit_bb;
1368        latch_b:
1369          goto header_b;
1370
1371        exit_bb:
1372
1373    This function creates PHI nodes at merge_bb and replaces the use of i_5
1374    in the update_loop's PHI node with the result of new PHI result.  */
1375
1376 static void
1377 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1378                                     struct loop *update_loop,
1379                                     edge guard_edge, edge merge_edge)
1380 {
1381   source_location merge_loc, guard_loc;
1382   edge orig_e = loop_preheader_edge (skip_loop);
1383   edge update_e = loop_preheader_edge (update_loop);
1384   gphi_iterator gsi_orig, gsi_update;
1385
1386   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1387         gsi_update = gsi_start_phis (update_loop->header));
1388        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1389        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1390     {
1391       gphi *orig_phi = gsi_orig.phi ();
1392       gphi *update_phi = gsi_update.phi ();
1393
1394       /* Generate new phi node at merge bb of the guard.  */
1395       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1396       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1397
1398       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1399          args in NEW_PHI for these edges.  */
1400       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1401       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1402       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1403       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1404       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1405       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1406
1407       /* Update phi in UPDATE_PHI.  */
1408       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1409     }
1410 }
1411
1412 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1413    this function searches for the corresponding lcssa phi node in exit
1414    bb of LOOP.  If it is found, return the phi result; otherwise return
1415    NULL.  */
1416
1417 static tree
1418 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1419                 gphi *lcssa_phi)
1420 {
1421   gphi_iterator gsi;
1422   edge e = single_exit (loop);
1423
1424   gcc_assert (single_pred_p (e->dest));
1425   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1426     {
1427       gphi *phi = gsi.phi ();
1428       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1429                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1430         return PHI_RESULT (phi);
1431     }
1432   return NULL_TREE;
1433 }
1434
1435 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1436    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1437    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1438    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1439    The CFG looks like:
1440
1441      loop:
1442        header_a:
1443          i_1 = PHI<i_0, i_2>;
1444          ...
1445          i_2 = i_1 + 1;
1446          if (cond_a)
1447            goto latch_a;
1448          else
1449            goto exit_a;
1450        latch_a:
1451          goto header_a;
1452
1453        exit_a:
1454
1455        guard_bb:
1456          if (cond)
1457            goto merge_bb;
1458          else
1459            goto epilog_loop;
1460
1461        ;; fall_through_bb
1462
1463      epilog_loop:
1464        header_b:
1465          i_3 = PHI<i_2, i_4>;
1466          ...
1467          i_4 = i_3 + 1;
1468          if (cond_b)
1469            goto latch_b;
1470          else
1471            goto merge_bb;
1472        latch_b:
1473          goto header_b;
1474
1475        merge_bb:
1476          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1477
1478        exit_bb:
1479          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1480
1481    For each name used out side EPILOG (i.e - for each name that has a lcssa
1482    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1483    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1484    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1485    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1486    in exit_bb will also be updated.  */
1487
1488 static void
1489 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1490                                     edge guard_edge, edge merge_edge)
1491 {
1492   gphi_iterator gsi;
1493   basic_block merge_bb = guard_edge->dest;
1494
1495   gcc_assert (single_succ_p (merge_bb));
1496   edge e = single_succ_edge (merge_bb);
1497   basic_block exit_bb = e->dest;
1498   gcc_assert (single_pred_p (exit_bb));
1499   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1500
1501   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1502     {
1503       gphi *update_phi = gsi.phi ();
1504       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1505       /* This loop-closed-phi actually doesn't represent a use out of the
1506          loop - the phi arg is a constant.  */
1507       if (TREE_CODE (old_arg) != SSA_NAME)
1508         continue;
1509
1510       tree merge_arg = get_current_def (old_arg);
1511       if (!merge_arg)
1512         merge_arg = old_arg;
1513
1514       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1515       /* If the var is live after loop but not a reduction, we simply
1516          use the old arg.  */
1517       if (!guard_arg)
1518         guard_arg = old_arg;
1519
1520       /* Create new phi node in MERGE_BB:  */
1521       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1522       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1523
1524       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1525          the two PHI args in merge_phi for these edges.  */
1526       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1527       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1528
1529       /* Update the original phi in exit_bb.  */
1530       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1531     }
1532 }
1533
1534 /* EPILOG loop is duplicated from the original loop for vectorizing,
1535    the arg of its loop closed ssa PHI needs to be updated.  */
1536
1537 static void
1538 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1539 {
1540   gphi_iterator gsi;
1541   basic_block exit_bb = single_exit (epilog)->dest;
1542
1543   gcc_assert (single_pred_p (exit_bb));
1544   edge e = EDGE_PRED (exit_bb, 0);
1545   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1546     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1547 }
1548
1549 /* Function vect_do_peeling.
1550
1551    Input:
1552    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1553
1554        preheader:
1555      LOOP:
1556        header_bb:
1557          loop_body
1558          if (exit_loop_cond) goto exit_bb
1559          else                goto header_bb
1560        exit_bb:
1561
1562    - NITERS: The number of iterations of the loop.
1563    - NITERSM1: The number of iterations of the loop's latch.
1564    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1565    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1566                               CHECK_PROFITABILITY is true.
1567    Output:
1568    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1569
1570    This function peels prolog and epilog from the loop, adds guards skipping
1571    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1572    would look like:
1573
1574        guard_bb_1:
1575          if (prefer_scalar_loop) goto merge_bb_1
1576          else                    goto guard_bb_2
1577
1578        guard_bb_2:
1579          if (skip_prolog) goto merge_bb_2
1580          else             goto prolog_preheader
1581
1582        prolog_preheader:
1583      PROLOG:
1584        prolog_header_bb:
1585          prolog_body
1586          if (exit_prolog_cond) goto prolog_exit_bb
1587          else                  goto prolog_header_bb
1588        prolog_exit_bb:
1589
1590        merge_bb_2:
1591
1592        vector_preheader:
1593      VECTOR LOOP:
1594        vector_header_bb:
1595          vector_body
1596          if (exit_vector_cond) goto vector_exit_bb
1597          else                  goto vector_header_bb
1598        vector_exit_bb:
1599
1600        guard_bb_3:
1601          if (skip_epilog) goto merge_bb_3
1602          else             goto epilog_preheader
1603
1604        merge_bb_1:
1605
1606        epilog_preheader:
1607      EPILOG:
1608        epilog_header_bb:
1609          epilog_body
1610          if (exit_epilog_cond) goto merge_bb_3
1611          else                  goto epilog_header_bb
1612
1613        merge_bb_3:
1614
1615    Note this function peels prolog and epilog only if it's necessary,
1616    as well as guards.
1617    Returns created epilogue or NULL.
1618
1619    TODO: Guard for prefer_scalar_loop should be emitted along with
1620    versioning conditions if loop versioning is needed.  */
1621
1622
1623 struct loop *
1624 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1625                  tree *niters_vector, int th, bool check_profitability,
1626                  bool niters_no_overflow)
1627 {
1628   edge e, guard_e;
1629   tree type = TREE_TYPE (niters), guard_cond;
1630   basic_block guard_bb, guard_to;
1631   int prob_prolog, prob_vector, prob_epilog;
1632   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1633   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1634   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1635   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1636                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1637
1638   if (!prolog_peeling && !epilog_peeling)
1639     return NULL;
1640
1641   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1642   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1643     vf = 3;
1644   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1645   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1646
1647   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1648   struct loop *first_loop = loop;
1649   create_lcssa_for_virtual_phi (loop);
1650   update_ssa (TODO_update_ssa_only_virtuals);
1651
1652   if (MAY_HAVE_DEBUG_STMTS)
1653     {
1654       gcc_assert (!adjust_vec.exists ());
1655       adjust_vec.create (32);
1656     }
1657   initialize_original_copy_tables ();
1658
1659   /* Prolog loop may be skipped.  */
1660   bool skip_prolog = (prolog_peeling != 0);
1661   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1662      we peel for epilog loop.  */
1663   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1664   /* Epilog loop must be executed if the number of iterations for epilog
1665      loop is known at compile time, otherwise we need to add a check at
1666      the end of vector loop and skip to the end of epilog loop.  */
1667   bool skip_epilog = (prolog_peeling < 0
1668                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1669   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1670   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1671     skip_epilog = false;
1672
1673   /* Record the anchor bb at which guard should be placed if scalar loop
1674      may be preferred.  */
1675   basic_block anchor = loop_preheader_edge (loop)->src;
1676   if (skip_vector)
1677     split_edge (loop_preheader_edge (loop));
1678
1679   tree niters_prolog = build_int_cst (type, 0);
1680   source_location loop_loc = find_loop_location (loop);
1681   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1682   if (prolog_peeling)
1683     {
1684       e = loop_preheader_edge (loop);
1685       if (!slpeel_can_duplicate_loop_p (loop, e))
1686         {
1687           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1688                            "loop can't be duplicated to preheader edge.\n");
1689           gcc_unreachable ();
1690         }
1691       /* Peel prolog and put it on preheader edge of loop.  */
1692       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1693       if (!prolog)
1694         {
1695           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1696                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1697           gcc_unreachable ();
1698         }
1699       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1700       first_loop = prolog;
1701       reset_original_copy_tables ();
1702
1703       /* Generate and update the number of iterations for prolog loop.  */
1704       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1705                                                    &bound_prolog);
1706       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1707
1708       /* Skip the prolog loop.  */
1709       if (skip_prolog)
1710         {
1711           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1712                                     niters_prolog, build_int_cst (type, 0));
1713           guard_bb = loop_preheader_edge (prolog)->src;
1714           guard_to = split_edge (loop_preheader_edge (loop));
1715           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1716                                            guard_to, guard_bb,
1717                                            inverse_probability (prob_prolog));
1718           e = EDGE_PRED (guard_to, 0);
1719           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1720           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1721           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1722         }
1723       /* Update init address of DRs.  */
1724       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1725       /* Update niters for vector loop.  */
1726       LOOP_VINFO_NITERS (loop_vinfo)
1727         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1728       LOOP_VINFO_NITERSM1 (loop_vinfo)
1729         = fold_build2 (MINUS_EXPR, type,
1730                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1731       niters = vect_build_loop_niters (loop_vinfo);
1732
1733       /* Prolog iterates at most bound_prolog times, latch iterates at
1734          most bound_prolog - 1 times.  */
1735       record_niter_bound (prolog, bound_prolog - 1, false, true);
1736       delete_update_ssa ();
1737       adjust_vec_debug_stmts ();
1738       scev_reset ();
1739     }
1740
1741   if (epilog_peeling)
1742     {
1743       e = single_exit (loop);
1744       if (!slpeel_can_duplicate_loop_p (loop, e))
1745         {
1746           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1747                            "loop can't be duplicated to exit edge.\n");
1748           gcc_unreachable ();
1749         }
1750       /* Peel epilog and put it on exit edge of loop.  */
1751       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1752       if (!epilog)
1753         {
1754           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1755                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1756           gcc_unreachable ();
1757         }
1758       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1759
1760       /* Scalar version loop may be preferred.  In this case, add guard
1761          and skip to epilog.  Note this only happens when the number of
1762          iterations of loop is unknown at compile time, otherwise this
1763          won't be vectorized.  */
1764       if (skip_vector)
1765         {
1766           /* Additional epilogue iteration is peeled if gap exists.  */
1767           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1768           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1769                                                 bound_prolog,
1770                                                 peel_for_gaps ? vf : vf - 1,
1771                                                 th, &bound_scalar,
1772                                                 check_profitability);
1773           /* Build guard against NITERSM1 since NITERS may overflow.  */
1774           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1775           guard_bb = anchor;
1776           guard_to = split_edge (loop_preheader_edge (epilog));
1777           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1778                                            guard_to, guard_bb,
1779                                            inverse_probability (prob_vector));
1780           e = EDGE_PRED (guard_to, 0);
1781           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1782           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1783           scale_loop_profile (epilog, prob_vector, bound_scalar);
1784         }
1785
1786       tree niters_vector_mult_vf;
1787       /* If loop is peeled for non-zero constant times, now niters refers to
1788          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1789          overflows.  */
1790       niters_no_overflow |= (prolog_peeling > 0);
1791       vect_gen_vector_loop_niters (loop_vinfo, niters,
1792                                    niters_vector, niters_no_overflow);
1793       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1794                                            &niters_vector_mult_vf);
1795       /* Update IVs of original loop as if they were advanced by
1796          niters_vector_mult_vf steps.  */
1797       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1798       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1799       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1800                                         update_e);
1801
1802       if (skip_epilog)
1803         {
1804           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1805                                     niters, niters_vector_mult_vf);
1806           guard_bb = single_exit (loop)->dest;
1807           guard_to = split_edge (single_exit (epilog));
1808           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1809                                            skip_vector ? anchor : guard_bb,
1810                                            inverse_probability (prob_epilog));
1811           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1812                                               single_exit (epilog));
1813           scale_loop_profile (epilog, prob_epilog, bound);
1814         }
1815       else
1816         slpeel_update_phi_nodes_for_lcssa (epilog);
1817
1818       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1819       /* We share epilog loop with scalar version loop.  */
1820       bound = MAX (bound, bound_scalar - 1);
1821       record_niter_bound (epilog, bound, false, true);
1822
1823       delete_update_ssa ();
1824       adjust_vec_debug_stmts ();
1825       scev_reset ();
1826     }
1827   adjust_vec.release ();
1828   free_original_copy_tables ();
1829
1830   return epilog;
1831 }
1832
1833 /* Function vect_create_cond_for_niters_checks.
1834
1835    Create a conditional expression that represents the run-time checks for
1836    loop's niter.  The loop is guaranteed to to terminate if the run-time
1837    checks hold.
1838
1839    Input:
1840    COND_EXPR  - input conditional expression.  New conditions will be chained
1841                 with logical AND operation.  If it is NULL, then the function
1842                 is used to return the number of alias checks.
1843    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1844                 to be checked.
1845
1846    Output:
1847    COND_EXPR - conditional expression.
1848
1849    The returned COND_EXPR is the conditional expression to be used in the
1850    if statement that controls which version of the loop gets executed at
1851    runtime.  */
1852
1853 static void
1854 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1855 {
1856   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1857
1858   if (*cond_expr)
1859     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1860                               *cond_expr, part_cond_expr);
1861   else
1862     *cond_expr = part_cond_expr;
1863 }
1864
1865 /* Function vect_create_cond_for_align_checks.
1866
1867    Create a conditional expression that represents the alignment checks for
1868    all of data references (array element references) whose alignment must be
1869    checked at runtime.
1870
1871    Input:
1872    COND_EXPR  - input conditional expression.  New conditions will be chained
1873                 with logical AND operation.
1874    LOOP_VINFO - two fields of the loop information are used.
1875                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1876                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1877
1878    Output:
1879    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1880                          expression.
1881    The returned value is the conditional expression to be used in the if
1882    statement that controls which version of the loop gets executed at runtime.
1883
1884    The algorithm makes two assumptions:
1885      1) The number of bytes "n" in a vector is a power of 2.
1886      2) An address "a" is aligned if a%n is zero and that this
1887         test can be done as a&(n-1) == 0.  For example, for 16
1888         byte vectors the test is a&0xf == 0.  */
1889
1890 static void
1891 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1892                                    tree *cond_expr,
1893                                    gimple_seq *cond_expr_stmt_list)
1894 {
1895   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1896   vec<gimple *> may_misalign_stmts
1897     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1898   gimple *ref_stmt;
1899   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1900   tree mask_cst;
1901   unsigned int i;
1902   tree int_ptrsize_type;
1903   char tmp_name[20];
1904   tree or_tmp_name = NULL_TREE;
1905   tree and_tmp_name;
1906   gimple *and_stmt;
1907   tree ptrsize_zero;
1908   tree part_cond_expr;
1909
1910   /* Check that mask is one less than a power of 2, i.e., mask is
1911      all zeros followed by all ones.  */
1912   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1913
1914   int_ptrsize_type = signed_type_for (ptr_type_node);
1915
1916   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1917      of the first vector of the i'th data reference. */
1918
1919   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1920     {
1921       gimple_seq new_stmt_list = NULL;
1922       tree addr_base;
1923       tree addr_tmp_name;
1924       tree new_or_tmp_name;
1925       gimple *addr_stmt, *or_stmt;
1926       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1927       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1928       bool negative = tree_int_cst_compare
1929         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1930       tree offset = negative
1931         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1932
1933       /* create: addr_tmp = (int)(address_of_first_vector) */
1934       addr_base =
1935         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1936                                               offset, loop);
1937       if (new_stmt_list != NULL)
1938         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1939
1940       sprintf (tmp_name, "addr2int%d", i);
1941       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1942       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1943       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1944
1945       /* The addresses are OR together.  */
1946
1947       if (or_tmp_name != NULL_TREE)
1948         {
1949           /* create: or_tmp = or_tmp | addr_tmp */
1950           sprintf (tmp_name, "orptrs%d", i);
1951           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1952           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1953                                          or_tmp_name, addr_tmp_name);
1954           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1955           or_tmp_name = new_or_tmp_name;
1956         }
1957       else
1958         or_tmp_name = addr_tmp_name;
1959
1960     } /* end for i */
1961
1962   mask_cst = build_int_cst (int_ptrsize_type, mask);
1963
1964   /* create: and_tmp = or_tmp & mask  */
1965   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1966
1967   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1968                                   or_tmp_name, mask_cst);
1969   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1970
1971   /* Make and_tmp the left operand of the conditional test against zero.
1972      if and_tmp has a nonzero bit then some address is unaligned.  */
1973   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1974   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1975                                 and_tmp_name, ptrsize_zero);
1976   if (*cond_expr)
1977     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1978                               *cond_expr, part_cond_expr);
1979   else
1980     *cond_expr = part_cond_expr;
1981 }
1982
1983 /* Given two data references and segment lengths described by DR_A and DR_B,
1984    create expression checking if the two addresses ranges intersect with
1985    each other based on index of the two addresses.  This can only be done
1986    if DR_A and DR_B referring to the same (array) object and the index is
1987    the only difference.  For example:
1988
1989                        DR_A                           DR_B
1990       data-ref         arr[i]                         arr[j]
1991       base_object      arr                            arr
1992       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1993
1994    The addresses and their index are like:
1995
1996         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1997      ------------------------------------------------------->
1998         |   |   |   |   |          |   |   |   |   |
1999      ------------------------------------------------------->
2000         i_0 ...         i_0+4      j_0 ...         j_0+4
2001
2002    We can create expression based on index rather than address:
2003
2004      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2005
2006    Note evolution step of index needs to be considered in comparison.  */
2007
2008 static bool
2009 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2010                                      const dr_with_seg_len& dr_a,
2011                                      const dr_with_seg_len& dr_b)
2012 {
2013   if (integer_zerop (DR_STEP (dr_a.dr))
2014       || integer_zerop (DR_STEP (dr_b.dr))
2015       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2016     return false;
2017
2018   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2019     return false;
2020
2021   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2022     return false;
2023
2024   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2025     return false;
2026
2027   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2028     return false;
2029
2030   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2031
2032   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2033   unsigned HOST_WIDE_INT abs_step
2034     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2035
2036   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2037   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2038   /* Infer the number of iterations with which the memory segment is accessed
2039      by DR.  In other words, alias is checked if memory segment accessed by
2040      DR_A in some iterations intersect with memory segment accessed by DR_B
2041      in the same amount iterations.
2042      Note segnment length is a linear function of number of iterations with
2043      DR_STEP as the coefficient.  */
2044   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2045   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2046
2047   unsigned int i;
2048   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2049   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2050     {
2051       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2052       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2053       /* Two indices must be the same if they are not scev, or not scev wrto
2054          current loop being vecorized.  */
2055       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2056           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2057           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2058           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2059         {
2060           if (operand_equal_p (access1, access2, 0))
2061             continue;
2062
2063           return false;
2064         }
2065       /* The two indices must have the same step.  */
2066       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2067         return false;
2068
2069       tree idx_step = CHREC_RIGHT (access1);
2070       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2071       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2072       /* Index must evaluate in the same direction as DR.  */
2073       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2074
2075       tree min1 = CHREC_LEFT (access1);
2076       tree min2 = CHREC_LEFT (access2);
2077       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2078         return false;
2079
2080       /* Ideally, alias can be checked against loop's control IV, but we
2081          need to prove linear mapping between control IV and reference
2082          index.  Although that should be true, we check against (array)
2083          index of data reference.  Like segment length, index length is
2084          linear function of the number of iterations with index_step as
2085          the coefficient, i.e, niter_len * idx_step.  */
2086       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2087                                    build_int_cst (TREE_TYPE (min1),
2088                                                   niter_len1));
2089       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2090                                    build_int_cst (TREE_TYPE (min2),
2091                                                   niter_len2));
2092       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2093       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2094       /* Adjust ranges for negative step.  */
2095       if (neg_step)
2096         {
2097           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2098           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2099                               CHREC_LEFT (access1), idx_step);
2100           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2101           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2102                               CHREC_LEFT (access2), idx_step);
2103         }
2104       tree part_cond_expr
2105         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2106             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2107             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2108       if (*cond_expr)
2109         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2110                                   *cond_expr, part_cond_expr);
2111       else
2112         *cond_expr = part_cond_expr;
2113     }
2114   return true;
2115 }
2116
2117 /* Given two data references and segment lengths described by DR_A and DR_B,
2118    create expression checking if the two addresses ranges intersect with
2119    each other:
2120
2121      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2122      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2123
2124 static void
2125 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2126                                const dr_with_seg_len& dr_a,
2127                                const dr_with_seg_len& dr_b)
2128 {
2129   *cond_expr = NULL_TREE;
2130   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2131     return;
2132
2133   tree segment_length_a = dr_a.seg_len;
2134   tree segment_length_b = dr_b.seg_len;
2135   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2136   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2137   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2138
2139   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2140                           offset_a, DR_INIT (dr_a.dr));
2141   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2142                           offset_b, DR_INIT (dr_b.dr));
2143   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2144   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2145
2146   tree seg_a_min = addr_base_a;
2147   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2148   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2149      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2150      [a, a+12) */
2151   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2152     {
2153       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2154       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2155       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2156     }
2157
2158   tree seg_b_min = addr_base_b;
2159   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2160   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2161     {
2162       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2163       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2164       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2165     }
2166   *cond_expr
2167     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2168         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2169         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2170 }
2171
2172 /* Function vect_create_cond_for_alias_checks.
2173
2174    Create a conditional expression that represents the run-time checks for
2175    overlapping of address ranges represented by a list of data references
2176    relations passed as input.
2177
2178    Input:
2179    COND_EXPR  - input conditional expression.  New conditions will be chained
2180                 with logical AND operation.  If it is NULL, then the function
2181                 is used to return the number of alias checks.
2182    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2183                 to be checked.
2184
2185    Output:
2186    COND_EXPR - conditional expression.
2187
2188    The returned COND_EXPR is the conditional expression to be used in the if
2189    statement that controls which version of the loop gets executed at runtime.
2190 */
2191
2192 void
2193 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2194 {
2195   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2196     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2197   tree part_cond_expr;
2198
2199   if (comp_alias_ddrs.is_empty ())
2200     return;
2201
2202   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2203     {
2204       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2205       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2206
2207       if (dump_enabled_p ())
2208         {
2209           dump_printf_loc (MSG_NOTE, vect_location,
2210                            "create runtime check for data references ");
2211           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2212           dump_printf (MSG_NOTE, " and ");
2213           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2214           dump_printf (MSG_NOTE, "\n");
2215         }
2216
2217       /* Create condition expression for each pair data references.  */
2218       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2219       if (*cond_expr)
2220         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2221                                   *cond_expr, part_cond_expr);
2222       else
2223         *cond_expr = part_cond_expr;
2224     }
2225
2226   if (dump_enabled_p ())
2227     dump_printf_loc (MSG_NOTE, vect_location,
2228                      "created %u versioning for alias checks.\n",
2229                      comp_alias_ddrs.length ());
2230 }
2231
2232
2233 /* Function vect_loop_versioning.
2234
2235    If the loop has data references that may or may not be aligned or/and
2236    has data reference relations whose independence was not proven then
2237    two versions of the loop need to be generated, one which is vectorized
2238    and one which isn't.  A test is then generated to control which of the
2239    loops is executed.  The test checks for the alignment of all of the
2240    data references that may or may not be aligned.  An additional
2241    sequence of runtime tests is generated for each pairs of DDRs whose
2242    independence was not proven.  The vectorized version of loop is
2243    executed only if both alias and alignment tests are passed.
2244
2245    The test generated to check which version of loop is executed
2246    is modified to also check for profitability as indicated by the
2247    cost model threshold TH.
2248
2249    The versioning precondition(s) are placed in *COND_EXPR and
2250    *COND_EXPR_STMT_LIST.  */
2251
2252 void
2253 vect_loop_versioning (loop_vec_info loop_vinfo,
2254                       unsigned int th, bool check_profitability)
2255 {
2256   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2257   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2258   basic_block condition_bb;
2259   gphi_iterator gsi;
2260   gimple_stmt_iterator cond_exp_gsi;
2261   basic_block merge_bb;
2262   basic_block new_exit_bb;
2263   edge new_exit_e, e;
2264   gphi *orig_phi, *new_phi;
2265   tree cond_expr = NULL_TREE;
2266   gimple_seq cond_expr_stmt_list = NULL;
2267   tree arg;
2268   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2269   gimple_seq gimplify_stmt_list = NULL;
2270   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2271   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2272   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2273   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2274
2275   if (check_profitability)
2276     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2277                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2278                                                        th));
2279
2280   if (version_niter)
2281     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2282
2283   if (cond_expr)
2284     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2285                                         is_gimple_condexpr, NULL_TREE);
2286
2287   if (version_align)
2288     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2289                                        &cond_expr_stmt_list);
2290
2291   if (version_alias)
2292     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2293
2294   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2295                                       is_gimple_condexpr, NULL_TREE);
2296   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2297
2298   initialize_original_copy_tables ();
2299   if (scalar_loop)
2300     {
2301       edge scalar_e;
2302       basic_block preheader, scalar_preheader;
2303
2304       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2305          scale LOOP's frequencies instead.  */
2306       nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2307                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2308       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2309       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2310          while we need to move it above LOOP's preheader.  */
2311       e = loop_preheader_edge (loop);
2312       scalar_e = loop_preheader_edge (scalar_loop);
2313       gcc_assert (empty_block_p (e->src)
2314                   && single_pred_p (e->src));
2315       gcc_assert (empty_block_p (scalar_e->src)
2316                   && single_pred_p (scalar_e->src));
2317       gcc_assert (single_pred_p (condition_bb));
2318       preheader = e->src;
2319       scalar_preheader = scalar_e->src;
2320       scalar_e = find_edge (condition_bb, scalar_preheader);
2321       e = single_pred_edge (preheader);
2322       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2323                                       scalar_preheader);
2324       redirect_edge_and_branch_force (scalar_e, preheader);
2325       redirect_edge_and_branch_force (e, condition_bb);
2326       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2327                                single_pred (condition_bb));
2328       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2329                                single_pred (scalar_preheader));
2330       set_immediate_dominator (CDI_DOMINATORS, preheader,
2331                                condition_bb);
2332     }
2333   else
2334     nloop = loop_version (loop, cond_expr, &condition_bb,
2335                           prob, prob, REG_BR_PROB_BASE - prob, true);
2336
2337   if (version_niter)
2338     {
2339       /* The versioned loop could be infinite, we need to clear existing
2340          niter information which is copied from the original loop.  */
2341       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2342       vect_free_loop_info_assumptions (nloop);
2343       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2344       loop_constraint_set (loop, LOOP_C_INFINITE);
2345     }
2346
2347   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2348       && dump_enabled_p ())
2349     {
2350       if (version_alias)
2351         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2352                          "loop versioned for vectorization because of "
2353                          "possible aliasing\n");
2354       if (version_align)
2355         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2356                          "loop versioned for vectorization to enhance "
2357                          "alignment\n");
2358
2359     }
2360   free_original_copy_tables ();
2361
2362   /* Loop versioning violates an assumption we try to maintain during
2363      vectorization - that the loop exit block has a single predecessor.
2364      After versioning, the exit block of both loop versions is the same
2365      basic block (i.e. it has two predecessors). Just in order to simplify
2366      following transformations in the vectorizer, we fix this situation
2367      here by adding a new (empty) block on the exit-edge of the loop,
2368      with the proper loop-exit phis to maintain loop-closed-form.
2369      If loop versioning wasn't done from loop, but scalar_loop instead,
2370      merge_bb will have already just a single successor.  */
2371
2372   merge_bb = single_exit (loop)->dest;
2373   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2374     {
2375       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2376       new_exit_bb = split_edge (single_exit (loop));
2377       new_exit_e = single_exit (loop);
2378       e = EDGE_SUCC (new_exit_bb, 0);
2379
2380       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2381         {
2382           tree new_res;
2383           orig_phi = gsi.phi ();
2384           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2385           new_phi = create_phi_node (new_res, new_exit_bb);
2386           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2387           add_phi_arg (new_phi, arg, new_exit_e,
2388                        gimple_phi_arg_location_from_edge (orig_phi, e));
2389           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2390         }
2391     }
2392
2393   /* End loop-exit-fixes after versioning.  */
2394
2395   if (cond_expr_stmt_list)
2396     {
2397       cond_exp_gsi = gsi_last_bb (condition_bb);
2398       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2399                              GSI_SAME_STMT);
2400     }
2401   update_ssa (TODO_update_ssa);
2402 }