gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120               else
 121                 continue;
 122             }
 123         }
 124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 125            gsi_next (&gsi))
 126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 127     }
 128 }
 129
 130
 131 struct adjust_info
 132 {
 133   tree from, to;
 134   basic_block bb;
 135 };
 136
 137 /* A stack of values to be adjusted in debug stmts.  We have to
 138    process them LIFO, so that the closest substitution applies.  If we
 139    processed them FIFO, without the stack, we might substitute uses
 140    with a PHI DEF that would soon become non-dominant, and when we got
 141    to the suitable one, it wouldn't have anything to substitute any
 142    more.  */
 143 static vec<adjust_info, va_heap> adjust_vec;
 144
 145 /* Adjust any debug stmts that referenced AI->from values to use the
 146    loop-closed AI->to, if the references are dominated by AI->bb and
 147    not by the definition of AI->from.  */
 148
 149 static void
 150 adjust_debug_stmts_now (adjust_info *ai)
 151 {
 152   basic_block bbphi = ai->bb;
 153   tree orig_def = ai->from;
 154   tree new_def = ai->to;
 155   imm_use_iterator imm_iter;
 156   gimple *stmt;
 157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 158
 159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 160
 161   /* Adjust any debug stmts that held onto non-loop-closed
 162      references.  */
 163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 164     {
 165       use_operand_p use_p;
 166       basic_block bbuse;
 167
 168       if (!is_gimple_debug (stmt))
 169         continue;
 170
 171       gcc_assert (gimple_debug_bind_p (stmt));
 172
 173       bbuse = gimple_bb (stmt);
 174
 175       if ((bbuse == bbphi
 176            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 177           && !(bbuse == bbdef
 178                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 179         {
 180           if (new_def)
 181             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 182               SET_USE (use_p, new_def);
 183           else
 184             {
 185               gimple_debug_bind_reset_value (stmt);
 186               update_stmt (stmt);
 187             }
 188         }
 189     }
 190 }
 191
 192 /* Adjust debug stmts as scheduled before.  */
 193
 194 static void
 195 adjust_vec_debug_stmts (void)
 196 {
 197   if (!MAY_HAVE_DEBUG_STMTS)
 198     return;
 199
 200   gcc_assert (adjust_vec.exists ());
 201
 202   while (!adjust_vec.is_empty ())
 203     {
 204       adjust_debug_stmts_now (&adjust_vec.last ());
 205       adjust_vec.pop ();
 206     }
 207 }
 208
 209 /* Adjust any debug stmts that referenced FROM values to use the
 210    loop-closed TO, if the references are dominated by BB and not by
 211    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 212    will be postponed until adjust_vec_debug_stmts is called.  */
 213
 214 static void
 215 adjust_debug_stmts (tree from, tree to, basic_block bb)
 216 {
 217   adjust_info ai;
 218
 219   if (MAY_HAVE_DEBUG_STMTS
 220       && TREE_CODE (from) == SSA_NAME
 221       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 222       && ! virtual_operand_p (from))
 223     {
 224       ai.from = from;
 225       ai.to = to;
 226       ai.bb = bb;
 227
 228       if (adjust_vec.exists ())
 229         adjust_vec.safe_push (ai);
 230       else
 231         adjust_debug_stmts_now (&ai);
 232     }
 233 }
 234
 235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 236    to adjust any debug stmts that referenced the old phi arg,
 237    presumably non-loop-closed references left over from other
 238    transformations.  */
 239
 240 static void
 241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 242 {
 243   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 244
 245   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 246
 247   if (MAY_HAVE_DEBUG_STMTS)
 248     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 249                         gimple_bb (update_phi));
 250 }
 251
 252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 253    that starts at zero, increases by one and its limit is NITERS.
 254
 255    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 256
 257 void
 258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 259 {
 260   tree indx_before_incr, indx_after_incr;
 261   gcond *cond_stmt;
 262   gcond *orig_cond;
 263   edge exit_edge = single_exit (loop);
 264   gimple_stmt_iterator loop_cond_gsi;
 265   gimple_stmt_iterator incr_gsi;
 266   bool insert_after;
 267   tree init = build_int_cst (TREE_TYPE (niters), 0);
 268   tree step = build_int_cst (TREE_TYPE (niters), 1);
 269   source_location loop_loc;
 270   enum tree_code code;
 271
 272   orig_cond = get_loop_exit_condition (loop);
 273   gcc_assert (orig_cond);
 274   loop_cond_gsi = gsi_for_stmt (orig_cond);
 275
 276   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 277   create_iv (init, step, NULL_TREE, loop,
 278              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 279
 280   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 281                                               true, NULL_TREE, true,
 282                                               GSI_SAME_STMT);
 283   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 284                                      true, GSI_SAME_STMT);
 285
 286   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 287   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 288                                  NULL_TREE);
 289
 290   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 291
 292   /* Remove old loop exit test:  */
 293   gsi_remove (&loop_cond_gsi, true);
 294   free_stmt_vec_info (orig_cond);
 295
 296   loop_loc = find_loop_location (loop);
 297   if (dump_enabled_p ())
 298     {
 299       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 300         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 301                      LOCATION_LINE (loop_loc));
 302       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 303     }
 304
 305   /* Record the number of latch iterations.  */
 306   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 307                                      build_int_cst (TREE_TYPE (niters), 1));
 308 }
 309
 310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 311    For all PHI arguments in FROM->dest and TO->dest from those
 312    edges ensure that TO->dest PHI arguments have current_def
 313    to that in from.  */
 314
 315 static void
 316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 317 {
 318   gimple_stmt_iterator gsi_from, gsi_to;
 319
 320   for (gsi_from = gsi_start_phis (from->dest),
 321        gsi_to = gsi_start_phis (to->dest);
 322        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 323     {
 324       gimple *from_phi = gsi_stmt (gsi_from);
 325       gimple *to_phi = gsi_stmt (gsi_to);
 326       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 327       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 328       if (virtual_operand_p (from_arg))
 329         {
 330           gsi_next (&gsi_from);
 331           continue;
 332         }
 333       if (virtual_operand_p (to_arg))
 334         {
 335           gsi_next (&gsi_to);
 336           continue;
 337         }
 338       if (TREE_CODE (from_arg) != SSA_NAME)
 339         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 340       else
 341         {
 342           if (get_current_def (to_arg) == NULL_TREE)
 343             set_current_def (to_arg, get_current_def (from_arg));
 344         }
 345       gsi_next (&gsi_from);
 346       gsi_next (&gsi_to);
 347     }
 348
 349   gphi *from_phi = get_virtual_phi (from->dest);
 350   gphi *to_phi = get_virtual_phi (to->dest);
 351   if (from_phi)
 352     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 353                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 354 }
 355
 356
 357 /* Given LOOP this function generates a new copy of it and puts it
 358    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 359    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 360    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 361    entry or exit of LOOP.  */
 362
 363 struct loop *
 364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 365                                         struct loop *scalar_loop, edge e)
 366 {
 367   struct loop *new_loop;
 368   basic_block *new_bbs, *bbs, *pbbs;
 369   bool at_exit;
 370   bool was_imm_dom;
 371   basic_block exit_dest;
 372   edge exit, new_exit;
 373   bool duplicate_outer_loop = false;
 374
 375   exit = single_exit (loop);
 376   at_exit = (e == exit);
 377   if (!at_exit && e != loop_preheader_edge (loop))
 378     return NULL;
 379
 380   if (scalar_loop == NULL)
 381     scalar_loop = loop;
 382
 383   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 384   pbbs = bbs + 1;
 385   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 386   /* Allow duplication of outer loops.  */
 387   if (scalar_loop->inner)
 388     duplicate_outer_loop = true;
 389   /* Check whether duplication is possible.  */
 390   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 391     {
 392       free (bbs);
 393       return NULL;
 394     }
 395
 396   /* Generate new loop structure.  */
 397   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 398   duplicate_subloops (scalar_loop, new_loop);
 399
 400   exit_dest = exit->dest;
 401   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 402                                           exit_dest) == loop->header ?
 403                  true : false);
 404
 405   /* Also copy the pre-header, this avoids jumping through hoops to
 406      duplicate the loop entry PHI arguments.  Create an empty
 407      pre-header unconditionally for this.  */
 408   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 409   edge entry_e = single_pred_edge (preheader);
 410   bbs[0] = preheader;
 411   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 412
 413   exit = single_exit (scalar_loop);
 414   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 415             &exit, 1, &new_exit, NULL,
 416             at_exit ? loop->latch : e->src, true);
 417   exit = single_exit (loop);
 418   basic_block new_preheader = new_bbs[0];
 419
 420   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 421
 422   if (scalar_loop != loop)
 423     {
 424       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 425          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 426          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 427          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 428          header) to have current_def set, so copy them over.  */
 429       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 430                                                 exit);
 431       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 432                                                            0),
 433                                                 EDGE_SUCC (loop->latch, 0));
 434     }
 435
 436   if (at_exit) /* Add the loop copy at exit.  */
 437     {
 438       if (scalar_loop != loop)
 439         {
 440           gphi_iterator gsi;
 441           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 442
 443           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 444                gsi_next (&gsi))
 445             {
 446               gphi *phi = gsi.phi ();
 447               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 448               location_t orig_locus
 449                 = gimple_phi_arg_location_from_edge (phi, e);
 450
 451               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 452             }
 453         }
 454       redirect_edge_and_branch_force (e, new_preheader);
 455       flush_pending_stmts (e);
 456       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 457       if (was_imm_dom || duplicate_outer_loop)
 458         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 459
 460       /* And remove the non-necessary forwarder again.  Keep the other
 461          one so we have a proper pre-header for the loop at the exit edge.  */
 462       redirect_edge_pred (single_succ_edge (preheader),
 463                           single_pred (preheader));
 464       delete_basic_block (preheader);
 465       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 466                                loop_preheader_edge (scalar_loop)->src);
 467     }
 468   else /* Add the copy at entry.  */
 469     {
 470       if (scalar_loop != loop)
 471         {
 472           /* Remove the non-necessary forwarder of scalar_loop again.  */
 473           redirect_edge_pred (single_succ_edge (preheader),
 474                               single_pred (preheader));
 475           delete_basic_block (preheader);
 476           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 477                                    loop_preheader_edge (scalar_loop)->src);
 478           preheader = split_edge (loop_preheader_edge (loop));
 479           entry_e = single_pred_edge (preheader);
 480         }
 481
 482       redirect_edge_and_branch_force (entry_e, new_preheader);
 483       flush_pending_stmts (entry_e);
 484       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 485
 486       redirect_edge_and_branch_force (new_exit, preheader);
 487       flush_pending_stmts (new_exit);
 488       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 489
 490       /* And remove the non-necessary forwarder again.  Keep the other
 491          one so we have a proper pre-header for the loop at the exit edge.  */
 492       redirect_edge_pred (single_succ_edge (new_preheader),
 493                           single_pred (new_preheader));
 494       delete_basic_block (new_preheader);
 495       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 496                                loop_preheader_edge (new_loop)->src);
 497     }
 498
 499   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 500     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 501
 502   if (scalar_loop != loop)
 503     {
 504       /* Update new_loop->header PHIs, so that on the preheader
 505          edge they are the ones from loop rather than scalar_loop.  */
 506       gphi_iterator gsi_orig, gsi_new;
 507       edge orig_e = loop_preheader_edge (loop);
 508       edge new_e = loop_preheader_edge (new_loop);
 509
 510       for (gsi_orig = gsi_start_phis (loop->header),
 511            gsi_new = gsi_start_phis (new_loop->header);
 512            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 513            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 514         {
 515           gphi *orig_phi = gsi_orig.phi ();
 516           gphi *new_phi = gsi_new.phi ();
 517           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 518           location_t orig_locus
 519             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 520
 521           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 522         }
 523     }
 524
 525   free (new_bbs);
 526   free (bbs);
 527
 528   checking_verify_dominators (CDI_DOMINATORS);
 529
 530   return new_loop;
 531 }
 532
 533
 534 /* Given the condition expression COND, put it as the last statement of
 535    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 536    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 537    skip the loop.  PROBABILITY is the skip edge's probability.  */
 538
 539 static edge
 540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 541                        basic_block guard_to, basic_block dom_bb,
 542                        int probability)
 543 {
 544   gimple_stmt_iterator gsi;
 545   edge new_e, enter_e;
 546   gcond *cond_stmt;
 547   gimple_seq gimplify_stmt_list = NULL;
 548
 549   enter_e = EDGE_SUCC (guard_bb, 0);
 550   enter_e->flags &= ~EDGE_FALLTHRU;
 551   enter_e->flags |= EDGE_FALSE_VALUE;
 552   gsi = gsi_last_bb (guard_bb);
 553
 554   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 555                                  NULL_TREE);
 556   if (gimplify_stmt_list)
 557     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 558
 559   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 560   gsi = gsi_last_bb (guard_bb);
 561   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 562
 563   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 564   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 565
 566   new_e->count = guard_bb->count;
 567   new_e->probability = probability;
 568   new_e->count = apply_probability (enter_e->count, probability);
 569   enter_e->count -= new_e->count;
 570   enter_e->probability = inverse_probability (probability);
 571   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 572
 573   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 574   if (enter_e->dest->loop_father->header == enter_e->dest)
 575     split_edge (enter_e);
 576
 577   return new_e;
 578 }
 579
 580
 581 /* This function verifies that the following restrictions apply to LOOP:
 582    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 583        for innermost loop and 5 basic blocks for outer-loop.
 584    (2) it is single entry, single exit
 585    (3) its exit condition is the last stmt in the header
 586    (4) E is the entry/exit edge of LOOP.
 587  */
 588
 589 bool
 590 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 591 {
 592   edge exit_e = single_exit (loop);
 593   edge entry_e = loop_preheader_edge (loop);
 594   gcond *orig_cond = get_loop_exit_condition (loop);
 595   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 596   unsigned int num_bb = loop->inner? 5 : 2;
 597
 598   /* All loops have an outer scope; the only case loop->outer is NULL is for
 599      the function itself.  */
 600   if (!loop_outer (loop)
 601       || loop->num_nodes != num_bb
 602       || !empty_block_p (loop->latch)
 603       || !single_exit (loop)
 604       /* Verify that new loop exit condition can be trivially modified.  */
 605       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 606       || (e != exit_e && e != entry_e))
 607     return false;
 608
 609   return true;
 610 }
 611
 612 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 613    in the exit bb and rename all the uses after the loop.  This simplifies
 614    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 615    (but normally loop closed SSA form doesn't require virtual PHIs to be
 616    in the same form).  Doing this early simplifies the checking what
 617    uses should be renamed.  */
 618
 619 static void
 620 create_lcssa_for_virtual_phi (struct loop *loop)
 621 {
 622   gphi_iterator gsi;
 623   edge exit_e = single_exit (loop);
 624
 625   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 626     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 627       {
 628         gphi *phi = gsi.phi ();
 629         for (gsi = gsi_start_phis (exit_e->dest);
 630              !gsi_end_p (gsi); gsi_next (&gsi))
 631           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 632             break;
 633         if (gsi_end_p (gsi))
 634           {
 635             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 636             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 637             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 638             imm_use_iterator imm_iter;
 639             gimple *stmt;
 640             use_operand_p use_p;
 641
 642             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 643             gimple_phi_set_result (new_phi, new_vop);
 644             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 645               if (stmt != new_phi
 646                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 647                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 648                   SET_USE (use_p, new_vop);
 649           }
 650         break;
 651       }
 652
 653 }
 654
 655 /* Function vect_get_loop_location.
 656
 657    Extract the location of the loop in the source code.
 658    If the loop is not well formed for vectorization, an estimated
 659    location is calculated.
 660    Return the loop location if succeed and NULL if not.  */
 661
 662 source_location
 663 find_loop_location (struct loop *loop)
 664 {
 665   gimple *stmt = NULL;
 666   basic_block bb;
 667   gimple_stmt_iterator si;
 668
 669   if (!loop)
 670     return UNKNOWN_LOCATION;
 671
 672   stmt = get_loop_exit_condition (loop);
 673
 674   if (stmt
 675       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 676     return gimple_location (stmt);
 677
 678   /* If we got here the loop is probably not "well formed",
 679      try to estimate the loop location */
 680
 681   if (!loop->header)
 682     return UNKNOWN_LOCATION;
 683
 684   bb = loop->header;
 685
 686   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 687     {
 688       stmt = gsi_stmt (si);
 689       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 690         return gimple_location (stmt);
 691     }
 692
 693   return UNKNOWN_LOCATION;
 694 }
 695
 696 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 697
 698 static bool
 699 iv_phi_p (gphi *phi)
 700 {
 701   if (virtual_operand_p (PHI_RESULT (phi)))
 702     return false;
 703
 704   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 705   gcc_assert (stmt_info != NULL);
 706   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 707       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 708     return false;
 709
 710   return true;
 711 }
 712
 713 /* Function vect_can_advance_ivs_p
 714
 715    In case the number of iterations that LOOP iterates is unknown at compile
 716    time, an epilog loop will be generated, and the loop induction variables
 717    (IVs) will be "advanced" to the value they are supposed to take just before
 718    the epilog loop.  Here we check that the access function of the loop IVs
 719    and the expression that represents the loop bound are simple enough.
 720    These restrictions will be relaxed in the future.  */
 721
 722 bool
 723 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 724 {
 725   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 726   basic_block bb = loop->header;
 727   gphi_iterator gsi;
 728
 729   /* Analyze phi functions of the loop header.  */
 730
 731   if (dump_enabled_p ())
 732     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 733   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 734     {
 735       tree evolution_part;
 736
 737       gphi *phi = gsi.phi ();
 738       if (dump_enabled_p ())
 739         {
 740           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 741           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 742         }
 743
 744       /* Skip virtual phi's. The data dependences that are associated with
 745          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 746
 747          Skip reduction phis.  */
 748       if (!iv_phi_p (phi))
 749         {
 750           if (dump_enabled_p ())
 751             dump_printf_loc (MSG_NOTE, vect_location,
 752                              "reduc or virtual phi. skip.\n");
 753           continue;
 754         }
 755
 756       /* Analyze the evolution function.  */
 757
 758       evolution_part
 759         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 760       if (evolution_part == NULL_TREE)
 761         {
 762           if (dump_enabled_p ())
 763             dump_printf (MSG_MISSED_OPTIMIZATION,
 764                          "No access function or evolution.\n");
 765           return false;
 766         }
 767
 768       /* FORNOW: We do not transform initial conditions of IVs
 769          which evolution functions are not invariants in the loop.  */
 770
 771       if (!expr_invariant_in_loop_p (loop, evolution_part))
 772         {
 773           if (dump_enabled_p ())
 774             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 775                              "evolution not invariant in loop.\n");
 776           return false;
 777         }
 778
 779       /* FORNOW: We do not transform initial conditions of IVs
 780          which evolution functions are a polynomial of degree >= 2.  */
 781
 782       if (tree_is_chrec (evolution_part))
 783         {
 784           if (dump_enabled_p ())
 785             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 786                              "evolution is chrec.\n");
 787           return false;
 788         }
 789     }
 790
 791   return true;
 792 }
 793
 794
 795 /*   Function vect_update_ivs_after_vectorizer.
 796
 797      "Advance" the induction variables of LOOP to the value they should take
 798      after the execution of LOOP.  This is currently necessary because the
 799      vectorizer does not handle induction variables that are used after the
 800      loop.  Such a situation occurs when the last iterations of LOOP are
 801      peeled, because:
 802      1. We introduced new uses after LOOP for IVs that were not originally used
 803         after LOOP: the IVs of LOOP are now used by an epilog loop.
 804      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 805         times, whereas the loop IVs should be bumped N times.
 806
 807      Input:
 808      - LOOP - a loop that is going to be vectorized. The last few iterations
 809               of LOOP were peeled.
 810      - NITERS - the number of iterations that LOOP executes (before it is
 811                 vectorized). i.e, the number of times the ivs should be bumped.
 812      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 813                   coming out from LOOP on which there are uses of the LOOP ivs
 814                   (this is the path from LOOP->exit to epilog_loop->preheader).
 815
 816                   The new definitions of the ivs are placed in LOOP->exit.
 817                   The phi args associated with the edge UPDATE_E in the bb
 818                   UPDATE_E->dest are updated accordingly.
 819
 820      Assumption 1: Like the rest of the vectorizer, this function assumes
 821      a single loop exit that has a single predecessor.
 822
 823      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 824      organized in the same order.
 825
 826      Assumption 3: The access function of the ivs is simple enough (see
 827      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 828
 829      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 830      coming out of LOOP on which the ivs of LOOP are used (this is the path
 831      that leads to the epilog loop; other paths skip the epilog loop).  This
 832      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 833      needs to have its phis updated.
 834  */
 835
 836 static void
 837 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 838                                   tree niters, edge update_e)
 839 {
 840   gphi_iterator gsi, gsi1;
 841   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 842   basic_block update_bb = update_e->dest;
 843   basic_block exit_bb = single_exit (loop)->dest;
 844
 845   /* Make sure there exists a single-predecessor exit bb:  */
 846   gcc_assert (single_pred_p (exit_bb));
 847   gcc_assert (single_succ_edge (exit_bb) == update_e);
 848
 849   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 850        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 851        gsi_next (&gsi), gsi_next (&gsi1))
 852     {
 853       tree init_expr;
 854       tree step_expr, off;
 855       tree type;
 856       tree var, ni, ni_name;
 857       gimple_stmt_iterator last_gsi;
 858
 859       gphi *phi = gsi.phi ();
 860       gphi *phi1 = gsi1.phi ();
 861       if (dump_enabled_p ())
 862         {
 863           dump_printf_loc (MSG_NOTE, vect_location,
 864                            "vect_update_ivs_after_vectorizer: phi: ");
 865           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 866         }
 867
 868       /* Skip reduction and virtual phis.  */
 869       if (!iv_phi_p (phi))
 870         {
 871           if (dump_enabled_p ())
 872             dump_printf_loc (MSG_NOTE, vect_location,
 873                              "reduc or virtual phi. skip.\n");
 874           continue;
 875         }
 876
 877       type = TREE_TYPE (gimple_phi_result (phi));
 878       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 879       step_expr = unshare_expr (step_expr);
 880
 881       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 882          of degree >= 2 or exponential.  */
 883       gcc_assert (!tree_is_chrec (step_expr));
 884
 885       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 886
 887       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 888                          fold_convert (TREE_TYPE (step_expr), niters),
 889                          step_expr);
 890       if (POINTER_TYPE_P (type))
 891         ni = fold_build_pointer_plus (init_expr, off);
 892       else
 893         ni = fold_build2 (PLUS_EXPR, type,
 894                           init_expr, fold_convert (type, off));
 895
 896       var = create_tmp_var (type, "tmp");
 897
 898       last_gsi = gsi_last_bb (exit_bb);
 899       gimple_seq new_stmts = NULL;
 900       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 901       /* Exit_bb shouldn't be empty.  */
 902       if (!gsi_end_p (last_gsi))
 903         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 904       else
 905         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 906
 907       /* Fix phi expressions in the successor bb.  */
 908       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 909     }
 910 }
 911
 912 /* Function vect_gen_prolog_loop_niters
 913
 914    Generate the number of iterations which should be peeled as prolog for the
 915    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 916    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 917    As a result, after the execution of this loop, the data reference DR will
 918    refer to an aligned location.  The following computation is generated:
 919
 920    If the misalignment of DR is known at compile time:
 921      addr_mis = int mis = DR_MISALIGNMENT (dr);
 922    Else, compute address misalignment in bytes:
 923      addr_mis = addr & (vectype_align - 1)
 924
 925    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 926
 927    (elem_size = element type size; an element is the scalar element whose type
 928    is the inner type of the vectype)
 929
 930    The computations will be emitted at the end of BB.  We also compute and
 931    store upper bound (included) of the result in BOUND.
 932
 933    When the step of the data-ref in the loop is not 1 (as in interleaved data
 934    and SLP), the number of iterations of the prolog must be divided by the step
 935    (which is equal to the size of interleaved group).
 936
 937    The above formulas assume that VF == number of elements in the vector. This
 938    may not hold when there are multiple-types in the loop.
 939    In this case, for some data-references in the loop the VF does not represent
 940    the number of elements that fit in the vector.  Therefore, instead of VF we
 941    use TYPE_VECTOR_SUBPARTS.  */
 942
 943 static tree
 944 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 945                              basic_block bb, int *bound)
 946 {
 947   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 948   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 949   tree var;
 950   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 951   gimple_seq stmts = NULL, new_stmts = NULL;
 952   tree iters, iters_name;
 953   gimple *dr_stmt = DR_STMT (dr);
 954   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 955   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 956   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 957   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 958
 959   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 960     {
 961       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 962
 963       if (dump_enabled_p ())
 964         dump_printf_loc (MSG_NOTE, vect_location,
 965                          "known peeling = %d.\n", npeel);
 966
 967       iters = build_int_cst (niters_type, npeel);
 968       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 969     }
 970   else
 971     {
 972       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 973       tree offset = negative
 974           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 975       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 976                                                 &stmts, offset, loop);
 977       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 978       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 979       HOST_WIDE_INT elem_size =
 980                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 981       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 982       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 983       tree nelements_tree = build_int_cst (type, nelements);
 984       tree byte_misalign;
 985       tree elem_misalign;
 986
 987       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 988       byte_misalign =
 989         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 990                      vectype_align_minus_1);
 991
 992       /* Create:  elem_misalign = byte_misalign / element_size  */
 993       elem_misalign =
 994         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 995
 996       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
 997       if (negative)
 998         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
 999       else
1000         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1001       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1002       iters = fold_convert (niters_type, iters);
1003       *bound = nelements - 1;
1004     }
1005
1006   if (dump_enabled_p ())
1007     {
1008       dump_printf_loc (MSG_NOTE, vect_location,
1009                        "niters for prolog loop: ");
1010       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1011       dump_printf (MSG_NOTE, "\n");
1012     }
1013
1014   var = create_tmp_var (niters_type, "prolog_loop_niters");
1015   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1016
1017   if (new_stmts)
1018     gimple_seq_add_seq (&stmts, new_stmts);
1019   if (stmts)
1020     {
1021       gcc_assert (single_succ_p (bb));
1022       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1023       if (gsi_end_p (gsi))
1024         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1025       else
1026         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1027     }
1028   return iters_name;
1029 }
1030
1031
1032 /* Function vect_update_init_of_dr
1033
1034    NITERS iterations were peeled from LOOP.  DR represents a data reference
1035    in LOOP.  This function updates the information recorded in DR to
1036    account for the fact that the first NITERS iterations had already been
1037    executed.  Specifically, it updates the OFFSET field of DR.  */
1038
1039 static void
1040 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1041 {
1042   tree offset = DR_OFFSET (dr);
1043
1044   niters = fold_build2 (MULT_EXPR, sizetype,
1045                         fold_convert (sizetype, niters),
1046                         fold_convert (sizetype, DR_STEP (dr)));
1047   offset = fold_build2 (PLUS_EXPR, sizetype,
1048                         fold_convert (sizetype, offset), niters);
1049   DR_OFFSET (dr) = offset;
1050 }
1051
1052
1053 /* Function vect_update_inits_of_drs
1054
1055    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1056    This function updates the information recorded for the data references in
1057    the loop to account for the fact that the first NITERS iterations had
1058    already been executed.  Specifically, it updates the initial_condition of
1059    the access_function of all the data_references in the loop.  */
1060
1061 static void
1062 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1063 {
1064   unsigned int i;
1065   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1066   struct data_reference *dr;
1067
1068   if (dump_enabled_p ())
1069     dump_printf_loc (MSG_NOTE, vect_location,
1070                      "=== vect_update_inits_of_dr ===\n");
1071
1072   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1073   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1074     {
1075       gimple_seq seq;
1076       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1077       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1078
1079       niters = fold_convert (sizetype, niters);
1080       niters = force_gimple_operand (niters, &seq, false, var);
1081       if (seq)
1082         {
1083           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1084           gcc_assert (!new_bb);
1085         }
1086     }
1087
1088   FOR_EACH_VEC_ELT (datarefs, i, dr)
1089     vect_update_init_of_dr (dr, niters);
1090 }
1091
1092
1093 /* This function builds ni_name = number of iterations.  Statements
1094    are emitted on the loop preheader edge.  */
1095
1096 tree
1097 vect_build_loop_niters (loop_vec_info loop_vinfo)
1098 {
1099   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1100   if (TREE_CODE (ni) == INTEGER_CST)
1101     return ni;
1102   else
1103     {
1104       tree ni_name, var;
1105       gimple_seq stmts = NULL;
1106       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1107
1108       var = create_tmp_var (TREE_TYPE (ni), "niters");
1109       ni_name = force_gimple_operand (ni, &stmts, false, var);
1110       if (stmts)
1111         gsi_insert_seq_on_edge_immediate (pe, stmts);
1112
1113       return ni_name;
1114     }
1115 }
1116
1117 /* Calculate the number of iterations above which vectorized loop will be
1118    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1119    of prolog loop.  If it's integer const, the integer number is also passed
1120    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1121    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1122    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1123    (rather than vectorized) loop will be executed.  This function stores
1124    upper bound (included) of the result in BOUND_SCALAR.  */
1125
1126 static tree
1127 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1128                              int bound_prolog, int vfm1, int th,
1129                              int *bound_scalar, bool check_profitability)
1130 {
1131   tree type = TREE_TYPE (niters_prolog);
1132   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1133                              build_int_cst (type, vfm1));
1134
1135   *bound_scalar = vfm1 + bound_prolog;
1136   if (check_profitability)
1137     {
1138       /* TH indicates the minimum niters of vectorized loop, while we
1139          compute the maximum niters of scalar loop.  */
1140       th--;
1141       /* Peeling for constant times.  */
1142       if (int_niters_prolog >= 0)
1143         {
1144           *bound_scalar = (int_niters_prolog + vfm1 < th
1145                             ? th
1146                             : vfm1 + int_niters_prolog);
1147           return build_int_cst (type, *bound_scalar);
1148         }
1149       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1150          bound (inlcuded) of niters of prolog loop.  */
1151       if (th >=  vfm1 + bound_prolog)
1152         {
1153           *bound_scalar = th;
1154           return build_int_cst (type, th);
1155         }
1156       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1157       else if (th > vfm1)
1158         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1159     }
1160   return niters;
1161 }
1162
1163 /* This function generates the following statements:
1164
1165    niters = number of iterations loop executes (after peeling)
1166    niters_vector = niters / vf
1167
1168    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1169    true if NITERS doesn't overflow.  */
1170
1171 void
1172 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1173                              tree *niters_vector_ptr, bool niters_no_overflow)
1174 {
1175   tree ni_minus_gap, var;
1176   tree niters_vector;
1177   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1178   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1179   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1180
1181   /* If epilogue loop is required because of data accesses with gaps, we
1182      subtract one iteration from the total number of iterations here for
1183      correct calculation of RATIO.  */
1184   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1185     {
1186       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1187                                   niters,
1188                                   build_one_cst (TREE_TYPE (niters)));
1189       if (!is_gimple_val (ni_minus_gap))
1190         {
1191           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1192           gimple *stmts = NULL;
1193           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1194                                                true, var);
1195           gsi_insert_seq_on_edge_immediate (pe, stmts);
1196         }
1197     }
1198   else
1199     ni_minus_gap = niters;
1200
1201   /* Create: niters >> log2(vf) */
1202   /* If it's known that niters == number of latch executions + 1 doesn't
1203      overflow, we can generate niters >> log2(vf); otherwise we generate
1204      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1205      will be at least one.  */
1206   if (niters_no_overflow)
1207     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1208                                  ni_minus_gap, log_vf);
1209   else
1210     niters_vector
1211       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1212                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1213                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1214                                                ni_minus_gap,
1215                                                build_int_cst
1216                                                  (TREE_TYPE (niters), vf)),
1217                                   log_vf),
1218                      build_int_cst (TREE_TYPE (niters), 1));
1219
1220   if (!is_gimple_val (niters_vector))
1221     {
1222       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1223       gimple *stmts = NULL;
1224       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1225       gsi_insert_seq_on_edge_immediate (pe, stmts);
1226     }
1227   *niters_vector_ptr = niters_vector;
1228
1229   return;
1230 }
1231
1232 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1233    loop specified by LOOP_VINFO after vectorization, compute the number
1234    of iterations before vectorization (niters_vector * vf) and store it
1235    to NITERS_VECTOR_MULT_VF_PTR.  */
1236
1237 static void
1238 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1239                                      tree niters_vector,
1240                                      tree *niters_vector_mult_vf_ptr)
1241 {
1242   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1243   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1244   tree type = TREE_TYPE (niters_vector);
1245   tree log_vf = build_int_cst (type, exact_log2 (vf));
1246   basic_block exit_bb = single_exit (loop)->dest;
1247
1248   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1249   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1250                                             niters_vector, log_vf);
1251   if (!is_gimple_val (niters_vector_mult_vf))
1252     {
1253       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1254       gimple_seq stmts = NULL;
1255       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1256                                                     &stmts, true, var);
1257       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1258       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1259     }
1260   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1261 }
1262
1263 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1264    from SECOND/FIRST and puts it at the original loop's preheader/exit
1265    edge, the two loops are arranged as below:
1266
1267        preheader_a:
1268      first_loop:
1269        header_a:
1270          i_1 = PHI<i_0, i_2>;
1271          ...
1272          i_2 = i_1 + 1;
1273          if (cond_a)
1274            goto latch_a;
1275          else
1276            goto between_bb;
1277        latch_a:
1278          goto header_a;
1279
1280        between_bb:
1281          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1282
1283      second_loop:
1284        header_b:
1285          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1286                                  or with i_2 if no LCSSA phi is created
1287                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1288          ...
1289          i_4 = i_3 + 1;
1290          if (cond_b)
1291            goto latch_b;
1292          else
1293            goto exit_bb;
1294        latch_b:
1295          goto header_b;
1296
1297        exit_bb:
1298
1299    This function creates loop closed SSA for the first loop; update the
1300    second loop's PHI nodes by replacing argument on incoming edge with the
1301    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1302    is false, Loop closed ssa phis will only be created for non-iv phis for
1303    the first loop.
1304
1305    This function assumes exit bb of the first loop is preheader bb of the
1306    second loop, i.e, between_bb in the example code.  With PHIs updated,
1307    the second loop will execute rest iterations of the first.  */
1308
1309 static void
1310 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1311                                    struct loop *first, struct loop *second,
1312                                    bool create_lcssa_for_iv_phis)
1313 {
1314   gphi_iterator gsi_update, gsi_orig;
1315   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1316
1317   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1318   edge second_preheader_e = loop_preheader_edge (second);
1319   basic_block between_bb = single_exit (first)->dest;
1320
1321   gcc_assert (between_bb == second_preheader_e->src);
1322   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1323   /* Either the first loop or the second is the loop to be vectorized.  */
1324   gcc_assert (loop == first || loop == second);
1325
1326   for (gsi_orig = gsi_start_phis (first->header),
1327        gsi_update = gsi_start_phis (second->header);
1328        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1329        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1330     {
1331       gphi *orig_phi = gsi_orig.phi ();
1332       gphi *update_phi = gsi_update.phi ();
1333
1334       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1335       /* Generate lcssa PHI node for the first loop.  */
1336       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1337       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1338         {
1339           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1340           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1341           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1342           arg = new_res;
1343         }
1344
1345       /* Update PHI node in the second loop by replacing arg on the loop's
1346          incoming edge.  */
1347       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1348     }
1349 }
1350
1351 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1352    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1353    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1354    appear like below:
1355
1356        guard_bb:
1357          if (cond)
1358            goto merge_bb;
1359          else
1360            goto skip_loop;
1361
1362      skip_loop:
1363        header_a:
1364          i_1 = PHI<i_0, i_2>;
1365          ...
1366          i_2 = i_1 + 1;
1367          if (cond_a)
1368            goto latch_a;
1369          else
1370            goto exit_a;
1371        latch_a:
1372          goto header_a;
1373
1374        exit_a:
1375          i_5 = PHI<i_2>;
1376
1377        merge_bb:
1378          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1379
1380      update_loop:
1381        header_b:
1382          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1383          ...
1384          i_4 = i_3 + 1;
1385          if (cond_b)
1386            goto latch_b;
1387          else
1388            goto exit_bb;
1389        latch_b:
1390          goto header_b;
1391
1392        exit_bb:
1393
1394    This function creates PHI nodes at merge_bb and replaces the use of i_5
1395    in the update_loop's PHI node with the result of new PHI result.  */
1396
1397 static void
1398 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1399                                     struct loop *update_loop,
1400                                     edge guard_edge, edge merge_edge)
1401 {
1402   source_location merge_loc, guard_loc;
1403   edge orig_e = loop_preheader_edge (skip_loop);
1404   edge update_e = loop_preheader_edge (update_loop);
1405   gphi_iterator gsi_orig, gsi_update;
1406
1407   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1408         gsi_update = gsi_start_phis (update_loop->header));
1409        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1410        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1411     {
1412       gphi *orig_phi = gsi_orig.phi ();
1413       gphi *update_phi = gsi_update.phi ();
1414
1415       /* Generate new phi node at merge bb of the guard.  */
1416       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1417       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1418
1419       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1420          args in NEW_PHI for these edges.  */
1421       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1422       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1423       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1424       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1425       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1426       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1427
1428       /* Update phi in UPDATE_PHI.  */
1429       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1430     }
1431 }
1432
1433 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1434    this function searches for the corresponding lcssa phi node in exit
1435    bb of LOOP.  If it is found, return the phi result; otherwise return
1436    NULL.  */
1437
1438 static tree
1439 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1440                 gphi *lcssa_phi)
1441 {
1442   gphi_iterator gsi;
1443   edge e = single_exit (loop);
1444
1445   gcc_assert (single_pred_p (e->dest));
1446   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1447     {
1448       gphi *phi = gsi.phi ();
1449       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1450                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1451         return PHI_RESULT (phi);
1452     }
1453   return NULL_TREE;
1454 }
1455
1456 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1457    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1458    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1459    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1460    The CFG looks like:
1461
1462      loop:
1463        header_a:
1464          i_1 = PHI<i_0, i_2>;
1465          ...
1466          i_2 = i_1 + 1;
1467          if (cond_a)
1468            goto latch_a;
1469          else
1470            goto exit_a;
1471        latch_a:
1472          goto header_a;
1473
1474        exit_a:
1475
1476        guard_bb:
1477          if (cond)
1478            goto merge_bb;
1479          else
1480            goto epilog_loop;
1481
1482        ;; fall_through_bb
1483
1484      epilog_loop:
1485        header_b:
1486          i_3 = PHI<i_2, i_4>;
1487          ...
1488          i_4 = i_3 + 1;
1489          if (cond_b)
1490            goto latch_b;
1491          else
1492            goto merge_bb;
1493        latch_b:
1494          goto header_b;
1495
1496        merge_bb:
1497          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1498
1499        exit_bb:
1500          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1501
1502    For each name used out side EPILOG (i.e - for each name that has a lcssa
1503    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1504    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1505    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1506    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1507    in exit_bb will also be updated.  */
1508
1509 static void
1510 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1511                                     edge guard_edge, edge merge_edge)
1512 {
1513   gphi_iterator gsi;
1514   basic_block merge_bb = guard_edge->dest;
1515
1516   gcc_assert (single_succ_p (merge_bb));
1517   edge e = single_succ_edge (merge_bb);
1518   basic_block exit_bb = e->dest;
1519   gcc_assert (single_pred_p (exit_bb));
1520   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1521
1522   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1523     {
1524       gphi *update_phi = gsi.phi ();
1525       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1526       /* This loop-closed-phi actually doesn't represent a use out of the
1527          loop - the phi arg is a constant.  */
1528       if (TREE_CODE (old_arg) != SSA_NAME)
1529         continue;
1530
1531       tree merge_arg = get_current_def (old_arg);
1532       if (!merge_arg)
1533         merge_arg = old_arg;
1534
1535       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1536       /* If the var is live after loop but not a reduction, we simply
1537          use the old arg.  */
1538       if (!guard_arg)
1539         guard_arg = old_arg;
1540
1541       /* Create new phi node in MERGE_BB:  */
1542       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1543       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1544
1545       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1546          the two PHI args in merge_phi for these edges.  */
1547       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1548       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1549
1550       /* Update the original phi in exit_bb.  */
1551       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1552     }
1553 }
1554
1555 /* EPILOG loop is duplicated from the original loop for vectorizing,
1556    the arg of its loop closed ssa PHI needs to be updated.  */
1557
1558 static void
1559 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1560 {
1561   gphi_iterator gsi;
1562   basic_block exit_bb = single_exit (epilog)->dest;
1563
1564   gcc_assert (single_pred_p (exit_bb));
1565   edge e = EDGE_PRED (exit_bb, 0);
1566   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1567     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1568 }
1569
1570 /* Function vect_do_peeling.
1571
1572    Input:
1573    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1574
1575        preheader:
1576      LOOP:
1577        header_bb:
1578          loop_body
1579          if (exit_loop_cond) goto exit_bb
1580          else                goto header_bb
1581        exit_bb:
1582
1583    - NITERS: The number of iterations of the loop.
1584    - NITERSM1: The number of iterations of the loop's latch.
1585    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1586    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1587                               CHECK_PROFITABILITY is true.
1588    Output:
1589    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1590
1591    This function peels prolog and epilog from the loop, adds guards skipping
1592    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1593    would look like:
1594
1595        guard_bb_1:
1596          if (prefer_scalar_loop) goto merge_bb_1
1597          else                    goto guard_bb_2
1598
1599        guard_bb_2:
1600          if (skip_prolog) goto merge_bb_2
1601          else             goto prolog_preheader
1602
1603        prolog_preheader:
1604      PROLOG:
1605        prolog_header_bb:
1606          prolog_body
1607          if (exit_prolog_cond) goto prolog_exit_bb
1608          else                  goto prolog_header_bb
1609        prolog_exit_bb:
1610
1611        merge_bb_2:
1612
1613        vector_preheader:
1614      VECTOR LOOP:
1615        vector_header_bb:
1616          vector_body
1617          if (exit_vector_cond) goto vector_exit_bb
1618          else                  goto vector_header_bb
1619        vector_exit_bb:
1620
1621        guard_bb_3:
1622          if (skip_epilog) goto merge_bb_3
1623          else             goto epilog_preheader
1624
1625        merge_bb_1:
1626
1627        epilog_preheader:
1628      EPILOG:
1629        epilog_header_bb:
1630          epilog_body
1631          if (exit_epilog_cond) goto merge_bb_3
1632          else                  goto epilog_header_bb
1633
1634        merge_bb_3:
1635
1636    Note this function peels prolog and epilog only if it's necessary,
1637    as well as guards.
1638    Returns created epilogue or NULL.
1639
1640    TODO: Guard for prefer_scalar_loop should be emitted along with
1641    versioning conditions if loop versioning is needed.  */
1642
1643
1644 struct loop *
1645 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1646                  tree *niters_vector, int th, bool check_profitability,
1647                  bool niters_no_overflow)
1648 {
1649   edge e, guard_e;
1650   tree type = TREE_TYPE (niters), guard_cond;
1651   basic_block guard_bb, guard_to;
1652   int prob_prolog, prob_vector, prob_epilog;
1653   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1654   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1655   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1656   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1657                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1658
1659   if (!prolog_peeling && !epilog_peeling)
1660     return NULL;
1661
1662   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1663   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1664     vf = 3;
1665   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1666   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1667
1668   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1669   struct loop *first_loop = loop;
1670   create_lcssa_for_virtual_phi (loop);
1671   update_ssa (TODO_update_ssa_only_virtuals);
1672
1673   if (MAY_HAVE_DEBUG_STMTS)
1674     {
1675       gcc_assert (!adjust_vec.exists ());
1676       adjust_vec.create (32);
1677     }
1678   initialize_original_copy_tables ();
1679
1680   /* Prolog loop may be skipped.  */
1681   bool skip_prolog = (prolog_peeling != 0);
1682   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1683      we peel for epilog loop.  */
1684   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1685   /* Epilog loop must be executed if the number of iterations for epilog
1686      loop is known at compile time, otherwise we need to add a check at
1687      the end of vector loop and skip to the end of epilog loop.  */
1688   bool skip_epilog = (prolog_peeling < 0
1689                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1690   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1691   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1692     skip_epilog = false;
1693
1694   /* Record the anchor bb at which guard should be placed if scalar loop
1695      may be preferred.  */
1696   basic_block anchor = loop_preheader_edge (loop)->src;
1697   if (skip_vector)
1698     {
1699       split_edge (loop_preheader_edge (loop));
1700
1701       /* Due to the order in which we peel prolog and epilog, we first
1702          propagate probability to the whole loop.  The purpose is to
1703          avoid adjusting probabilities of both prolog and vector loops
1704          separately.  Note in this case, the probability of epilog loop
1705          needs to be scaled back later.  */
1706       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1707       scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1708                                  REG_BR_PROB_BASE);
1709       scale_loop_profile (loop, prob_vector, bound);
1710     }
1711
1712   tree niters_prolog = build_int_cst (type, 0);
1713   source_location loop_loc = find_loop_location (loop);
1714   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1715   if (prolog_peeling)
1716     {
1717       e = loop_preheader_edge (loop);
1718       if (!slpeel_can_duplicate_loop_p (loop, e))
1719         {
1720           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1721                            "loop can't be duplicated to preheader edge.\n");
1722           gcc_unreachable ();
1723         }
1724       /* Peel prolog and put it on preheader edge of loop.  */
1725       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1726       if (!prolog)
1727         {
1728           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1729                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1730           gcc_unreachable ();
1731         }
1732       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1733       first_loop = prolog;
1734       reset_original_copy_tables ();
1735
1736       /* Generate and update the number of iterations for prolog loop.  */
1737       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1738                                                    &bound_prolog);
1739       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1740
1741       /* Skip the prolog loop.  */
1742       if (skip_prolog)
1743         {
1744           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1745                                     niters_prolog, build_int_cst (type, 0));
1746           guard_bb = loop_preheader_edge (prolog)->src;
1747           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1748           guard_to = split_edge (loop_preheader_edge (loop));
1749           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1750                                            guard_to, guard_bb,
1751                                            inverse_probability (prob_prolog));
1752           e = EDGE_PRED (guard_to, 0);
1753           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1754           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1755
1756           scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1757                                      REG_BR_PROB_BASE);
1758           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1759         }
1760       /* Update init address of DRs.  */
1761       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1762       /* Update niters for vector loop.  */
1763       LOOP_VINFO_NITERS (loop_vinfo)
1764         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1765       LOOP_VINFO_NITERSM1 (loop_vinfo)
1766         = fold_build2 (MINUS_EXPR, type,
1767                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1768       niters = vect_build_loop_niters (loop_vinfo);
1769
1770       /* Prolog iterates at most bound_prolog times, latch iterates at
1771          most bound_prolog - 1 times.  */
1772       record_niter_bound (prolog, bound_prolog - 1, false, true);
1773       delete_update_ssa ();
1774       adjust_vec_debug_stmts ();
1775       scev_reset ();
1776     }
1777
1778   if (epilog_peeling)
1779     {
1780       e = single_exit (loop);
1781       if (!slpeel_can_duplicate_loop_p (loop, e))
1782         {
1783           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1784                            "loop can't be duplicated to exit edge.\n");
1785           gcc_unreachable ();
1786         }
1787       /* Peel epilog and put it on exit edge of loop.  */
1788       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1789       if (!epilog)
1790         {
1791           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1792                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1793           gcc_unreachable ();
1794         }
1795       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1796
1797       /* Scalar version loop may be preferred.  In this case, add guard
1798          and skip to epilog.  Note this only happens when the number of
1799          iterations of loop is unknown at compile time, otherwise this
1800          won't be vectorized.  */
1801       if (skip_vector)
1802         {
1803           /* Additional epilogue iteration is peeled if gap exists.  */
1804           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1805           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1806                                                 bound_prolog,
1807                                                 peel_for_gaps ? vf : vf - 1,
1808                                                 th, &bound_scalar,
1809                                                 check_profitability);
1810           /* Build guard against NITERSM1 since NITERS may overflow.  */
1811           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1812           guard_bb = anchor;
1813           guard_to = split_edge (loop_preheader_edge (epilog));
1814           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1815                                            guard_to, guard_bb,
1816                                            inverse_probability (prob_vector));
1817           e = EDGE_PRED (guard_to, 0);
1818           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1819           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1820
1821           /* Simply propagate profile info from guard_bb to guard_to which is
1822              a merge point of control flow.  */
1823           guard_to->frequency = guard_bb->frequency;
1824           guard_to->count = guard_bb->count;
1825           single_succ_edge (guard_to)->count = guard_to->count;
1826           /* Scale probability of epilog loop back.  */
1827           int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1828           scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1829         }
1830
1831       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1832       tree niters_vector_mult_vf;
1833       /* If loop is peeled for non-zero constant times, now niters refers to
1834          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1835          overflows.  */
1836       niters_no_overflow |= (prolog_peeling > 0);
1837       vect_gen_vector_loop_niters (loop_vinfo, niters,
1838                                    niters_vector, niters_no_overflow);
1839       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1840                                            &niters_vector_mult_vf);
1841       /* Update IVs of original loop as if they were advanced by
1842          niters_vector_mult_vf steps.  */
1843       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1844       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1845       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1846                                         update_e);
1847
1848       if (skip_epilog)
1849         {
1850           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1851                                     niters, niters_vector_mult_vf);
1852           guard_bb = single_exit (loop)->dest;
1853           guard_to = split_edge (single_exit (epilog));
1854           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1855                                            skip_vector ? anchor : guard_bb,
1856                                            inverse_probability (prob_epilog));
1857           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1858                                               single_exit (epilog));
1859           /* Only need to handle basic block before epilog loop if it's not
1860              the guard_bb, which is the case when skip_vector is true.  */
1861           if (guard_bb != bb_before_epilog)
1862             {
1863               prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1864                              + inverse_probability (prob_vector));
1865
1866               scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1867                                          REG_BR_PROB_BASE);
1868             }
1869           scale_loop_profile (epilog, prob_epilog, bound);
1870         }
1871       else
1872         slpeel_update_phi_nodes_for_lcssa (epilog);
1873
1874       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1875       /* We share epilog loop with scalar version loop.  */
1876       bound = MAX (bound, bound_scalar - 1);
1877       record_niter_bound (epilog, bound, false, true);
1878
1879       delete_update_ssa ();
1880       adjust_vec_debug_stmts ();
1881       scev_reset ();
1882     }
1883   adjust_vec.release ();
1884   free_original_copy_tables ();
1885
1886   return epilog;
1887 }
1888
1889 /* Function vect_create_cond_for_niters_checks.
1890
1891    Create a conditional expression that represents the run-time checks for
1892    loop's niter.  The loop is guaranteed to to terminate if the run-time
1893    checks hold.
1894
1895    Input:
1896    COND_EXPR  - input conditional expression.  New conditions will be chained
1897                 with logical AND operation.  If it is NULL, then the function
1898                 is used to return the number of alias checks.
1899    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1900                 to be checked.
1901
1902    Output:
1903    COND_EXPR - conditional expression.
1904
1905    The returned COND_EXPR is the conditional expression to be used in the
1906    if statement that controls which version of the loop gets executed at
1907    runtime.  */
1908
1909 static void
1910 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1911 {
1912   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1913
1914   if (*cond_expr)
1915     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1916                               *cond_expr, part_cond_expr);
1917   else
1918     *cond_expr = part_cond_expr;
1919 }
1920
1921 /* Function vect_create_cond_for_align_checks.
1922
1923    Create a conditional expression that represents the alignment checks for
1924    all of data references (array element references) whose alignment must be
1925    checked at runtime.
1926
1927    Input:
1928    COND_EXPR  - input conditional expression.  New conditions will be chained
1929                 with logical AND operation.
1930    LOOP_VINFO - two fields of the loop information are used.
1931                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1932                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1933
1934    Output:
1935    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1936                          expression.
1937    The returned value is the conditional expression to be used in the if
1938    statement that controls which version of the loop gets executed at runtime.
1939
1940    The algorithm makes two assumptions:
1941      1) The number of bytes "n" in a vector is a power of 2.
1942      2) An address "a" is aligned if a%n is zero and that this
1943         test can be done as a&(n-1) == 0.  For example, for 16
1944         byte vectors the test is a&0xf == 0.  */
1945
1946 static void
1947 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1948                                    tree *cond_expr,
1949                                    gimple_seq *cond_expr_stmt_list)
1950 {
1951   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1952   vec<gimple *> may_misalign_stmts
1953     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1954   gimple *ref_stmt;
1955   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1956   tree mask_cst;
1957   unsigned int i;
1958   tree int_ptrsize_type;
1959   char tmp_name[20];
1960   tree or_tmp_name = NULL_TREE;
1961   tree and_tmp_name;
1962   gimple *and_stmt;
1963   tree ptrsize_zero;
1964   tree part_cond_expr;
1965
1966   /* Check that mask is one less than a power of 2, i.e., mask is
1967      all zeros followed by all ones.  */
1968   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1969
1970   int_ptrsize_type = signed_type_for (ptr_type_node);
1971
1972   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1973      of the first vector of the i'th data reference. */
1974
1975   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1976     {
1977       gimple_seq new_stmt_list = NULL;
1978       tree addr_base;
1979       tree addr_tmp_name;
1980       tree new_or_tmp_name;
1981       gimple *addr_stmt, *or_stmt;
1982       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1983       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1984       bool negative = tree_int_cst_compare
1985         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1986       tree offset = negative
1987         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1988
1989       /* create: addr_tmp = (int)(address_of_first_vector) */
1990       addr_base =
1991         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1992                                               offset, loop);
1993       if (new_stmt_list != NULL)
1994         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1995
1996       sprintf (tmp_name, "addr2int%d", i);
1997       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1998       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1999       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2000
2001       /* The addresses are OR together.  */
2002
2003       if (or_tmp_name != NULL_TREE)
2004         {
2005           /* create: or_tmp = or_tmp | addr_tmp */
2006           sprintf (tmp_name, "orptrs%d", i);
2007           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2008           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2009                                          or_tmp_name, addr_tmp_name);
2010           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2011           or_tmp_name = new_or_tmp_name;
2012         }
2013       else
2014         or_tmp_name = addr_tmp_name;
2015
2016     } /* end for i */
2017
2018   mask_cst = build_int_cst (int_ptrsize_type, mask);
2019
2020   /* create: and_tmp = or_tmp & mask  */
2021   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2022
2023   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2024                                   or_tmp_name, mask_cst);
2025   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2026
2027   /* Make and_tmp the left operand of the conditional test against zero.
2028      if and_tmp has a nonzero bit then some address is unaligned.  */
2029   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2030   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2031                                 and_tmp_name, ptrsize_zero);
2032   if (*cond_expr)
2033     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2034                               *cond_expr, part_cond_expr);
2035   else
2036     *cond_expr = part_cond_expr;
2037 }
2038
2039 /* Given two data references and segment lengths described by DR_A and DR_B,
2040    create expression checking if the two addresses ranges intersect with
2041    each other based on index of the two addresses.  This can only be done
2042    if DR_A and DR_B referring to the same (array) object and the index is
2043    the only difference.  For example:
2044
2045                        DR_A                           DR_B
2046       data-ref         arr[i]                         arr[j]
2047       base_object      arr                            arr
2048       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2049
2050    The addresses and their index are like:
2051
2052         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2053      ------------------------------------------------------->
2054         |   |   |   |   |          |   |   |   |   |
2055      ------------------------------------------------------->
2056         i_0 ...         i_0+4      j_0 ...         j_0+4
2057
2058    We can create expression based on index rather than address:
2059
2060      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2061
2062    Note evolution step of index needs to be considered in comparison.  */
2063
2064 static bool
2065 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2066                                      const dr_with_seg_len& dr_a,
2067                                      const dr_with_seg_len& dr_b)
2068 {
2069   if (integer_zerop (DR_STEP (dr_a.dr))
2070       || integer_zerop (DR_STEP (dr_b.dr))
2071       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2072     return false;
2073
2074   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2075     return false;
2076
2077   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2078     return false;
2079
2080   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2081     return false;
2082
2083   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2084     return false;
2085
2086   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2087
2088   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2089   unsigned HOST_WIDE_INT abs_step
2090     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2091
2092   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2093   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2094   /* Infer the number of iterations with which the memory segment is accessed
2095      by DR.  In other words, alias is checked if memory segment accessed by
2096      DR_A in some iterations intersect with memory segment accessed by DR_B
2097      in the same amount iterations.
2098      Note segnment length is a linear function of number of iterations with
2099      DR_STEP as the coefficient.  */
2100   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2101   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2102
2103   unsigned int i;
2104   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2105   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2106     {
2107       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2108       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2109       /* Two indices must be the same if they are not scev, or not scev wrto
2110          current loop being vecorized.  */
2111       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2112           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2113           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2114           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2115         {
2116           if (operand_equal_p (access1, access2, 0))
2117             continue;
2118
2119           return false;
2120         }
2121       /* The two indices must have the same step.  */
2122       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2123         return false;
2124
2125       tree idx_step = CHREC_RIGHT (access1);
2126       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2127       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2128       /* Index must evaluate in the same direction as DR.  */
2129       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2130
2131       tree min1 = CHREC_LEFT (access1);
2132       tree min2 = CHREC_LEFT (access2);
2133       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2134         return false;
2135
2136       /* Ideally, alias can be checked against loop's control IV, but we
2137          need to prove linear mapping between control IV and reference
2138          index.  Although that should be true, we check against (array)
2139          index of data reference.  Like segment length, index length is
2140          linear function of the number of iterations with index_step as
2141          the coefficient, i.e, niter_len * idx_step.  */
2142       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2143                                    build_int_cst (TREE_TYPE (min1),
2144                                                   niter_len1));
2145       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2146                                    build_int_cst (TREE_TYPE (min2),
2147                                                   niter_len2));
2148       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2149       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2150       /* Adjust ranges for negative step.  */
2151       if (neg_step)
2152         {
2153           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2154           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2155                               CHREC_LEFT (access1), idx_step);
2156           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2157           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2158                               CHREC_LEFT (access2), idx_step);
2159         }
2160       tree part_cond_expr
2161         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2162             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2163             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2164       if (*cond_expr)
2165         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2166                                   *cond_expr, part_cond_expr);
2167       else
2168         *cond_expr = part_cond_expr;
2169     }
2170   return true;
2171 }
2172
2173 /* Given two data references and segment lengths described by DR_A and DR_B,
2174    create expression checking if the two addresses ranges intersect with
2175    each other:
2176
2177      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2178      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2179
2180 static void
2181 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2182                                const dr_with_seg_len& dr_a,
2183                                const dr_with_seg_len& dr_b)
2184 {
2185   *cond_expr = NULL_TREE;
2186   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2187     return;
2188
2189   tree segment_length_a = dr_a.seg_len;
2190   tree segment_length_b = dr_b.seg_len;
2191   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2192   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2193   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2194
2195   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2196                           offset_a, DR_INIT (dr_a.dr));
2197   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2198                           offset_b, DR_INIT (dr_b.dr));
2199   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2200   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2201
2202   tree seg_a_min = addr_base_a;
2203   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2204   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2205      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2206      [a, a+12) */
2207   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2208     {
2209       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2210       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2211       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2212     }
2213
2214   tree seg_b_min = addr_base_b;
2215   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2216   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2217     {
2218       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2219       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2220       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2221     }
2222   *cond_expr
2223     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2224         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2225         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2226 }
2227
2228 /* Function vect_create_cond_for_alias_checks.
2229
2230    Create a conditional expression that represents the run-time checks for
2231    overlapping of address ranges represented by a list of data references
2232    relations passed as input.
2233
2234    Input:
2235    COND_EXPR  - input conditional expression.  New conditions will be chained
2236                 with logical AND operation.  If it is NULL, then the function
2237                 is used to return the number of alias checks.
2238    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2239                 to be checked.
2240
2241    Output:
2242    COND_EXPR - conditional expression.
2243
2244    The returned COND_EXPR is the conditional expression to be used in the if
2245    statement that controls which version of the loop gets executed at runtime.
2246 */
2247
2248 void
2249 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2250 {
2251   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2252     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2253   tree part_cond_expr;
2254
2255   if (comp_alias_ddrs.is_empty ())
2256     return;
2257
2258   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2259     {
2260       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2261       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2262
2263       if (dump_enabled_p ())
2264         {
2265           dump_printf_loc (MSG_NOTE, vect_location,
2266                            "create runtime check for data references ");
2267           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2268           dump_printf (MSG_NOTE, " and ");
2269           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2270           dump_printf (MSG_NOTE, "\n");
2271         }
2272
2273       /* Create condition expression for each pair data references.  */
2274       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2275       if (*cond_expr)
2276         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2277                                   *cond_expr, part_cond_expr);
2278       else
2279         *cond_expr = part_cond_expr;
2280     }
2281
2282   if (dump_enabled_p ())
2283     dump_printf_loc (MSG_NOTE, vect_location,
2284                      "created %u versioning for alias checks.\n",
2285                      comp_alias_ddrs.length ());
2286 }
2287
2288
2289 /* Function vect_loop_versioning.
2290
2291    If the loop has data references that may or may not be aligned or/and
2292    has data reference relations whose independence was not proven then
2293    two versions of the loop need to be generated, one which is vectorized
2294    and one which isn't.  A test is then generated to control which of the
2295    loops is executed.  The test checks for the alignment of all of the
2296    data references that may or may not be aligned.  An additional
2297    sequence of runtime tests is generated for each pairs of DDRs whose
2298    independence was not proven.  The vectorized version of loop is
2299    executed only if both alias and alignment tests are passed.
2300
2301    The test generated to check which version of loop is executed
2302    is modified to also check for profitability as indicated by the
2303    cost model threshold TH.
2304
2305    The versioning precondition(s) are placed in *COND_EXPR and
2306    *COND_EXPR_STMT_LIST.  */
2307
2308 void
2309 vect_loop_versioning (loop_vec_info loop_vinfo,
2310                       unsigned int th, bool check_profitability)
2311 {
2312   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2313   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2314   basic_block condition_bb;
2315   gphi_iterator gsi;
2316   gimple_stmt_iterator cond_exp_gsi;
2317   basic_block merge_bb;
2318   basic_block new_exit_bb;
2319   edge new_exit_e, e;
2320   gphi *orig_phi, *new_phi;
2321   tree cond_expr = NULL_TREE;
2322   gimple_seq cond_expr_stmt_list = NULL;
2323   tree arg;
2324   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2325   gimple_seq gimplify_stmt_list = NULL;
2326   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2327   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2328   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2329   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2330
2331   if (check_profitability)
2332     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2333                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2334                                                        th));
2335
2336   if (version_niter)
2337     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2338
2339   if (cond_expr)
2340     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2341                                         is_gimple_condexpr, NULL_TREE);
2342
2343   if (version_align)
2344     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2345                                        &cond_expr_stmt_list);
2346
2347   if (version_alias)
2348     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2349
2350   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2351                                       is_gimple_condexpr, NULL_TREE);
2352   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2353
2354   initialize_original_copy_tables ();
2355   if (scalar_loop)
2356     {
2357       edge scalar_e;
2358       basic_block preheader, scalar_preheader;
2359
2360       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2361          scale LOOP's frequencies instead.  */
2362       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2363                             prob, REG_BR_PROB_BASE - prob,
2364                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2365       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2366       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2367          while we need to move it above LOOP's preheader.  */
2368       e = loop_preheader_edge (loop);
2369       scalar_e = loop_preheader_edge (scalar_loop);
2370       gcc_assert (empty_block_p (e->src)
2371                   && single_pred_p (e->src));
2372       gcc_assert (empty_block_p (scalar_e->src)
2373                   && single_pred_p (scalar_e->src));
2374       gcc_assert (single_pred_p (condition_bb));
2375       preheader = e->src;
2376       scalar_preheader = scalar_e->src;
2377       scalar_e = find_edge (condition_bb, scalar_preheader);
2378       e = single_pred_edge (preheader);
2379       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2380                                       scalar_preheader);
2381       redirect_edge_and_branch_force (scalar_e, preheader);
2382       redirect_edge_and_branch_force (e, condition_bb);
2383       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2384                                single_pred (condition_bb));
2385       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2386                                single_pred (scalar_preheader));
2387       set_immediate_dominator (CDI_DOMINATORS, preheader,
2388                                condition_bb);
2389     }
2390   else
2391     nloop = loop_version (loop, cond_expr, &condition_bb,
2392                           prob, REG_BR_PROB_BASE - prob,
2393                           prob, REG_BR_PROB_BASE - prob, true);
2394
2395   if (version_niter)
2396     {
2397       /* The versioned loop could be infinite, we need to clear existing
2398          niter information which is copied from the original loop.  */
2399       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2400       vect_free_loop_info_assumptions (nloop);
2401       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2402       loop_constraint_set (loop, LOOP_C_INFINITE);
2403     }
2404
2405   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2406       && dump_enabled_p ())
2407     {
2408       if (version_alias)
2409         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2410                          "loop versioned for vectorization because of "
2411                          "possible aliasing\n");
2412       if (version_align)
2413         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2414                          "loop versioned for vectorization to enhance "
2415                          "alignment\n");
2416
2417     }
2418   free_original_copy_tables ();
2419
2420   /* Loop versioning violates an assumption we try to maintain during
2421      vectorization - that the loop exit block has a single predecessor.
2422      After versioning, the exit block of both loop versions is the same
2423      basic block (i.e. it has two predecessors). Just in order to simplify
2424      following transformations in the vectorizer, we fix this situation
2425      here by adding a new (empty) block on the exit-edge of the loop,
2426      with the proper loop-exit phis to maintain loop-closed-form.
2427      If loop versioning wasn't done from loop, but scalar_loop instead,
2428      merge_bb will have already just a single successor.  */
2429
2430   merge_bb = single_exit (loop)->dest;
2431   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2432     {
2433       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2434       new_exit_bb = split_edge (single_exit (loop));
2435       new_exit_e = single_exit (loop);
2436       e = EDGE_SUCC (new_exit_bb, 0);
2437
2438       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2439         {
2440           tree new_res;
2441           orig_phi = gsi.phi ();
2442           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2443           new_phi = create_phi_node (new_res, new_exit_bb);
2444           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2445           add_phi_arg (new_phi, arg, new_exit_e,
2446                        gimple_phi_arg_location_from_edge (orig_phi, e));
2447           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2448         }
2449     }
2450
2451   /* End loop-exit-fixes after versioning.  */
2452
2453   if (cond_expr_stmt_list)
2454     {
2455       cond_exp_gsi = gsi_last_bb (condition_bb);
2456       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2457                              GSI_SAME_STMT);
2458     }
2459   update_ssa (TODO_update_ssa);
2460 }