gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120               else
 121                 continue;
 122             }
 123         }
 124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 125            gsi_next (&gsi))
 126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 127     }
 128 }
 129
 130
 131 struct adjust_info
 132 {
 133   tree from, to;
 134   basic_block bb;
 135 };
 136
 137 /* A stack of values to be adjusted in debug stmts.  We have to
 138    process them LIFO, so that the closest substitution applies.  If we
 139    processed them FIFO, without the stack, we might substitute uses
 140    with a PHI DEF that would soon become non-dominant, and when we got
 141    to the suitable one, it wouldn't have anything to substitute any
 142    more.  */
 143 static vec<adjust_info, va_heap> adjust_vec;
 144
 145 /* Adjust any debug stmts that referenced AI->from values to use the
 146    loop-closed AI->to, if the references are dominated by AI->bb and
 147    not by the definition of AI->from.  */
 148
 149 static void
 150 adjust_debug_stmts_now (adjust_info *ai)
 151 {
 152   basic_block bbphi = ai->bb;
 153   tree orig_def = ai->from;
 154   tree new_def = ai->to;
 155   imm_use_iterator imm_iter;
 156   gimple *stmt;
 157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 158
 159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 160
 161   /* Adjust any debug stmts that held onto non-loop-closed
 162      references.  */
 163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 164     {
 165       use_operand_p use_p;
 166       basic_block bbuse;
 167
 168       if (!is_gimple_debug (stmt))
 169         continue;
 170
 171       gcc_assert (gimple_debug_bind_p (stmt));
 172
 173       bbuse = gimple_bb (stmt);
 174
 175       if ((bbuse == bbphi
 176            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 177           && !(bbuse == bbdef
 178                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 179         {
 180           if (new_def)
 181             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 182               SET_USE (use_p, new_def);
 183           else
 184             {
 185               gimple_debug_bind_reset_value (stmt);
 186               update_stmt (stmt);
 187             }
 188         }
 189     }
 190 }
 191
 192 /* Adjust debug stmts as scheduled before.  */
 193
 194 static void
 195 adjust_vec_debug_stmts (void)
 196 {
 197   if (!MAY_HAVE_DEBUG_STMTS)
 198     return;
 199
 200   gcc_assert (adjust_vec.exists ());
 201
 202   while (!adjust_vec.is_empty ())
 203     {
 204       adjust_debug_stmts_now (&adjust_vec.last ());
 205       adjust_vec.pop ();
 206     }
 207 }
 208
 209 /* Adjust any debug stmts that referenced FROM values to use the
 210    loop-closed TO, if the references are dominated by BB and not by
 211    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 212    will be postponed until adjust_vec_debug_stmts is called.  */
 213
 214 static void
 215 adjust_debug_stmts (tree from, tree to, basic_block bb)
 216 {
 217   adjust_info ai;
 218
 219   if (MAY_HAVE_DEBUG_STMTS
 220       && TREE_CODE (from) == SSA_NAME
 221       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 222       && ! virtual_operand_p (from))
 223     {
 224       ai.from = from;
 225       ai.to = to;
 226       ai.bb = bb;
 227
 228       if (adjust_vec.exists ())
 229         adjust_vec.safe_push (ai);
 230       else
 231         adjust_debug_stmts_now (&ai);
 232     }
 233 }
 234
 235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 236    to adjust any debug stmts that referenced the old phi arg,
 237    presumably non-loop-closed references left over from other
 238    transformations.  */
 239
 240 static void
 241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 242 {
 243   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 244
 245   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 246
 247   if (MAY_HAVE_DEBUG_STMTS)
 248     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 249                         gimple_bb (update_phi));
 250 }
 251
 252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 253    that starts at zero, increases by one and its limit is NITERS.
 254
 255    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 256
 257 void
 258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 259 {
 260   tree indx_before_incr, indx_after_incr;
 261   gcond *cond_stmt;
 262   gcond *orig_cond;
 263   edge exit_edge = single_exit (loop);
 264   gimple_stmt_iterator loop_cond_gsi;
 265   gimple_stmt_iterator incr_gsi;
 266   bool insert_after;
 267   tree init = build_int_cst (TREE_TYPE (niters), 0);
 268   tree step = build_int_cst (TREE_TYPE (niters), 1);
 269   source_location loop_loc;
 270   enum tree_code code;
 271
 272   orig_cond = get_loop_exit_condition (loop);
 273   gcc_assert (orig_cond);
 274   loop_cond_gsi = gsi_for_stmt (orig_cond);
 275
 276   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 277   create_iv (init, step, NULL_TREE, loop,
 278              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 279
 280   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 281                                               true, NULL_TREE, true,
 282                                               GSI_SAME_STMT);
 283   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 284                                      true, GSI_SAME_STMT);
 285
 286   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 287   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 288                                  NULL_TREE);
 289
 290   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 291
 292   /* Remove old loop exit test:  */
 293   gsi_remove (&loop_cond_gsi, true);
 294   free_stmt_vec_info (orig_cond);
 295
 296   loop_loc = find_loop_location (loop);
 297   if (dump_enabled_p ())
 298     {
 299       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 300         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 301                      LOCATION_LINE (loop_loc));
 302       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 303     }
 304
 305   /* Record the number of latch iterations.  */
 306   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 307                                      build_int_cst (TREE_TYPE (niters), 1));
 308 }
 309
 310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 311    For all PHI arguments in FROM->dest and TO->dest from those
 312    edges ensure that TO->dest PHI arguments have current_def
 313    to that in from.  */
 314
 315 static void
 316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 317 {
 318   gimple_stmt_iterator gsi_from, gsi_to;
 319
 320   for (gsi_from = gsi_start_phis (from->dest),
 321        gsi_to = gsi_start_phis (to->dest);
 322        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 323     {
 324       gimple *from_phi = gsi_stmt (gsi_from);
 325       gimple *to_phi = gsi_stmt (gsi_to);
 326       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 327       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 328       if (virtual_operand_p (from_arg))
 329         {
 330           gsi_next (&gsi_from);
 331           continue;
 332         }
 333       if (virtual_operand_p (to_arg))
 334         {
 335           gsi_next (&gsi_to);
 336           continue;
 337         }
 338       if (TREE_CODE (from_arg) != SSA_NAME)
 339         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 340       else
 341         {
 342           if (get_current_def (to_arg) == NULL_TREE)
 343             set_current_def (to_arg, get_current_def (from_arg));
 344         }
 345       gsi_next (&gsi_from);
 346       gsi_next (&gsi_to);
 347     }
 348
 349   gphi *from_phi = get_virtual_phi (from->dest);
 350   gphi *to_phi = get_virtual_phi (to->dest);
 351   if (from_phi)
 352     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 353                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 354 }
 355
 356
 357 /* Given LOOP this function generates a new copy of it and puts it
 358    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 359    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 360    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 361    entry or exit of LOOP.  */
 362
 363 struct loop *
 364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 365                                         struct loop *scalar_loop, edge e)
 366 {
 367   struct loop *new_loop;
 368   basic_block *new_bbs, *bbs, *pbbs;
 369   bool at_exit;
 370   bool was_imm_dom;
 371   basic_block exit_dest;
 372   edge exit, new_exit;
 373   bool duplicate_outer_loop = false;
 374
 375   exit = single_exit (loop);
 376   at_exit = (e == exit);
 377   if (!at_exit && e != loop_preheader_edge (loop))
 378     return NULL;
 379
 380   if (scalar_loop == NULL)
 381     scalar_loop = loop;
 382
 383   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 384   pbbs = bbs + 1;
 385   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 386   /* Allow duplication of outer loops.  */
 387   if (scalar_loop->inner)
 388     duplicate_outer_loop = true;
 389   /* Check whether duplication is possible.  */
 390   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 391     {
 392       free (bbs);
 393       return NULL;
 394     }
 395
 396   /* Generate new loop structure.  */
 397   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 398   duplicate_subloops (scalar_loop, new_loop);
 399
 400   exit_dest = exit->dest;
 401   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 402                                           exit_dest) == loop->header ?
 403                  true : false);
 404
 405   /* Also copy the pre-header, this avoids jumping through hoops to
 406      duplicate the loop entry PHI arguments.  Create an empty
 407      pre-header unconditionally for this.  */
 408   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 409   edge entry_e = single_pred_edge (preheader);
 410   bbs[0] = preheader;
 411   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 412
 413   exit = single_exit (scalar_loop);
 414   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 415             &exit, 1, &new_exit, NULL,
 416             at_exit ? loop->latch : e->src, true);
 417   exit = single_exit (loop);
 418   basic_block new_preheader = new_bbs[0];
 419
 420   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 421
 422   if (scalar_loop != loop)
 423     {
 424       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 425          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 426          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 427          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 428          header) to have current_def set, so copy them over.  */
 429       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 430                                                 exit);
 431       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 432                                                            0),
 433                                                 EDGE_SUCC (loop->latch, 0));
 434     }
 435
 436   if (at_exit) /* Add the loop copy at exit.  */
 437     {
 438       if (scalar_loop != loop)
 439         {
 440           gphi_iterator gsi;
 441           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 442
 443           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 444                gsi_next (&gsi))
 445             {
 446               gphi *phi = gsi.phi ();
 447               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 448               location_t orig_locus
 449                 = gimple_phi_arg_location_from_edge (phi, e);
 450
 451               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 452             }
 453         }
 454       redirect_edge_and_branch_force (e, new_preheader);
 455       flush_pending_stmts (e);
 456       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 457       if (was_imm_dom || duplicate_outer_loop)
 458         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 459
 460       /* And remove the non-necessary forwarder again.  Keep the other
 461          one so we have a proper pre-header for the loop at the exit edge.  */
 462       redirect_edge_pred (single_succ_edge (preheader),
 463                           single_pred (preheader));
 464       delete_basic_block (preheader);
 465       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 466                                loop_preheader_edge (scalar_loop)->src);
 467     }
 468   else /* Add the copy at entry.  */
 469     {
 470       if (scalar_loop != loop)
 471         {
 472           /* Remove the non-necessary forwarder of scalar_loop again.  */
 473           redirect_edge_pred (single_succ_edge (preheader),
 474                               single_pred (preheader));
 475           delete_basic_block (preheader);
 476           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 477                                    loop_preheader_edge (scalar_loop)->src);
 478           preheader = split_edge (loop_preheader_edge (loop));
 479           entry_e = single_pred_edge (preheader);
 480         }
 481
 482       redirect_edge_and_branch_force (entry_e, new_preheader);
 483       flush_pending_stmts (entry_e);
 484       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 485
 486       redirect_edge_and_branch_force (new_exit, preheader);
 487       flush_pending_stmts (new_exit);
 488       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 489
 490       /* And remove the non-necessary forwarder again.  Keep the other
 491          one so we have a proper pre-header for the loop at the exit edge.  */
 492       redirect_edge_pred (single_succ_edge (new_preheader),
 493                           single_pred (new_preheader));
 494       delete_basic_block (new_preheader);
 495       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 496                                loop_preheader_edge (new_loop)->src);
 497     }
 498
 499   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 500     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 501
 502   if (scalar_loop != loop)
 503     {
 504       /* Update new_loop->header PHIs, so that on the preheader
 505          edge they are the ones from loop rather than scalar_loop.  */
 506       gphi_iterator gsi_orig, gsi_new;
 507       edge orig_e = loop_preheader_edge (loop);
 508       edge new_e = loop_preheader_edge (new_loop);
 509
 510       for (gsi_orig = gsi_start_phis (loop->header),
 511            gsi_new = gsi_start_phis (new_loop->header);
 512            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 513            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 514         {
 515           gphi *orig_phi = gsi_orig.phi ();
 516           gphi *new_phi = gsi_new.phi ();
 517           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 518           location_t orig_locus
 519             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 520
 521           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 522         }
 523     }
 524
 525   free (new_bbs);
 526   free (bbs);
 527
 528   checking_verify_dominators (CDI_DOMINATORS);
 529
 530   return new_loop;
 531 }
 532
 533
 534 /* Given the condition expression COND, put it as the last statement of
 535    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 536    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 537    skip the loop.  PROBABILITY is the skip edge's probability.  */
 538
 539 static edge
 540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 541                        basic_block guard_to, basic_block dom_bb,
 542                        int probability)
 543 {
 544   gimple_stmt_iterator gsi;
 545   edge new_e, enter_e;
 546   gcond *cond_stmt;
 547   gimple_seq gimplify_stmt_list = NULL;
 548
 549   enter_e = EDGE_SUCC (guard_bb, 0);
 550   enter_e->flags &= ~EDGE_FALLTHRU;
 551   enter_e->flags |= EDGE_FALSE_VALUE;
 552   gsi = gsi_last_bb (guard_bb);
 553
 554   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 555                                  NULL_TREE);
 556   if (gimplify_stmt_list)
 557     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 558
 559   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 560   gsi = gsi_last_bb (guard_bb);
 561   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 562
 563   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 564   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 565
 566   new_e->count = guard_bb->count;
 567   new_e->probability = probability;
 568   new_e->count = apply_probability (enter_e->count, probability);
 569   enter_e->count -= new_e->count;
 570   enter_e->probability = inverse_probability (probability);
 571   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 572   return new_e;
 573 }
 574
 575
 576 /* This function verifies that the following restrictions apply to LOOP:
 577    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 578        for innermost loop and 5 basic blocks for outer-loop.
 579    (2) it is single entry, single exit
 580    (3) its exit condition is the last stmt in the header
 581    (4) E is the entry/exit edge of LOOP.
 582  */
 583
 584 bool
 585 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 586 {
 587   edge exit_e = single_exit (loop);
 588   edge entry_e = loop_preheader_edge (loop);
 589   gcond *orig_cond = get_loop_exit_condition (loop);
 590   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 591   unsigned int num_bb = loop->inner? 5 : 2;
 592
 593   /* All loops have an outer scope; the only case loop->outer is NULL is for
 594      the function itself.  */
 595   if (!loop_outer (loop)
 596       || loop->num_nodes != num_bb
 597       || !empty_block_p (loop->latch)
 598       || !single_exit (loop)
 599       /* Verify that new loop exit condition can be trivially modified.  */
 600       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 601       || (e != exit_e && e != entry_e))
 602     return false;
 603
 604   return true;
 605 }
 606
 607 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 608    in the exit bb and rename all the uses after the loop.  This simplifies
 609    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 610    (but normally loop closed SSA form doesn't require virtual PHIs to be
 611    in the same form).  Doing this early simplifies the checking what
 612    uses should be renamed.  */
 613
 614 static void
 615 create_lcssa_for_virtual_phi (struct loop *loop)
 616 {
 617   gphi_iterator gsi;
 618   edge exit_e = single_exit (loop);
 619
 620   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 621     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 622       {
 623         gphi *phi = gsi.phi ();
 624         for (gsi = gsi_start_phis (exit_e->dest);
 625              !gsi_end_p (gsi); gsi_next (&gsi))
 626           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 627             break;
 628         if (gsi_end_p (gsi))
 629           {
 630             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 631             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 632             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 633             imm_use_iterator imm_iter;
 634             gimple *stmt;
 635             use_operand_p use_p;
 636
 637             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 638             gimple_phi_set_result (new_phi, new_vop);
 639             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 640               if (stmt != new_phi
 641                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 642                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 643                   SET_USE (use_p, new_vop);
 644           }
 645         break;
 646       }
 647
 648 }
 649
 650 /* Function vect_get_loop_location.
 651
 652    Extract the location of the loop in the source code.
 653    If the loop is not well formed for vectorization, an estimated
 654    location is calculated.
 655    Return the loop location if succeed and NULL if not.  */
 656
 657 source_location
 658 find_loop_location (struct loop *loop)
 659 {
 660   gimple *stmt = NULL;
 661   basic_block bb;
 662   gimple_stmt_iterator si;
 663
 664   if (!loop)
 665     return UNKNOWN_LOCATION;
 666
 667   stmt = get_loop_exit_condition (loop);
 668
 669   if (stmt
 670       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 671     return gimple_location (stmt);
 672
 673   /* If we got here the loop is probably not "well formed",
 674      try to estimate the loop location */
 675
 676   if (!loop->header)
 677     return UNKNOWN_LOCATION;
 678
 679   bb = loop->header;
 680
 681   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 682     {
 683       stmt = gsi_stmt (si);
 684       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 685         return gimple_location (stmt);
 686     }
 687
 688   return UNKNOWN_LOCATION;
 689 }
 690
 691 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 692
 693 static bool
 694 iv_phi_p (gphi *phi)
 695 {
 696   if (virtual_operand_p (PHI_RESULT (phi)))
 697     return false;
 698
 699   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 700   gcc_assert (stmt_info != NULL);
 701   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 702       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 703     return false;
 704
 705   return true;
 706 }
 707
 708 /* Function vect_can_advance_ivs_p
 709
 710    In case the number of iterations that LOOP iterates is unknown at compile
 711    time, an epilog loop will be generated, and the loop induction variables
 712    (IVs) will be "advanced" to the value they are supposed to take just before
 713    the epilog loop.  Here we check that the access function of the loop IVs
 714    and the expression that represents the loop bound are simple enough.
 715    These restrictions will be relaxed in the future.  */
 716
 717 bool
 718 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 719 {
 720   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 721   basic_block bb = loop->header;
 722   gphi_iterator gsi;
 723
 724   /* Analyze phi functions of the loop header.  */
 725
 726   if (dump_enabled_p ())
 727     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 728   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 729     {
 730       tree evolution_part;
 731
 732       gphi *phi = gsi.phi ();
 733       if (dump_enabled_p ())
 734         {
 735           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 736           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 737         }
 738
 739       /* Skip virtual phi's. The data dependences that are associated with
 740          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 741
 742          Skip reduction phis.  */
 743       if (!iv_phi_p (phi))
 744         {
 745           if (dump_enabled_p ())
 746             dump_printf_loc (MSG_NOTE, vect_location,
 747                              "reduc or virtual phi. skip.\n");
 748           continue;
 749         }
 750
 751       /* Analyze the evolution function.  */
 752
 753       evolution_part
 754         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 755       if (evolution_part == NULL_TREE)
 756         {
 757           if (dump_enabled_p ())
 758             dump_printf (MSG_MISSED_OPTIMIZATION,
 759                          "No access function or evolution.\n");
 760           return false;
 761         }
 762
 763       /* FORNOW: We do not transform initial conditions of IVs
 764          which evolution functions are not invariants in the loop.  */
 765
 766       if (!expr_invariant_in_loop_p (loop, evolution_part))
 767         {
 768           if (dump_enabled_p ())
 769             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 770                              "evolution not invariant in loop.\n");
 771           return false;
 772         }
 773
 774       /* FORNOW: We do not transform initial conditions of IVs
 775          which evolution functions are a polynomial of degree >= 2.  */
 776
 777       if (tree_is_chrec (evolution_part))
 778         {
 779           if (dump_enabled_p ())
 780             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 781                              "evolution is chrec.\n");
 782           return false;
 783         }
 784     }
 785
 786   return true;
 787 }
 788
 789
 790 /*   Function vect_update_ivs_after_vectorizer.
 791
 792      "Advance" the induction variables of LOOP to the value they should take
 793      after the execution of LOOP.  This is currently necessary because the
 794      vectorizer does not handle induction variables that are used after the
 795      loop.  Such a situation occurs when the last iterations of LOOP are
 796      peeled, because:
 797      1. We introduced new uses after LOOP for IVs that were not originally used
 798         after LOOP: the IVs of LOOP are now used by an epilog loop.
 799      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 800         times, whereas the loop IVs should be bumped N times.
 801
 802      Input:
 803      - LOOP - a loop that is going to be vectorized. The last few iterations
 804               of LOOP were peeled.
 805      - NITERS - the number of iterations that LOOP executes (before it is
 806                 vectorized). i.e, the number of times the ivs should be bumped.
 807      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 808                   coming out from LOOP on which there are uses of the LOOP ivs
 809                   (this is the path from LOOP->exit to epilog_loop->preheader).
 810
 811                   The new definitions of the ivs are placed in LOOP->exit.
 812                   The phi args associated with the edge UPDATE_E in the bb
 813                   UPDATE_E->dest are updated accordingly.
 814
 815      Assumption 1: Like the rest of the vectorizer, this function assumes
 816      a single loop exit that has a single predecessor.
 817
 818      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 819      organized in the same order.
 820
 821      Assumption 3: The access function of the ivs is simple enough (see
 822      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 823
 824      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 825      coming out of LOOP on which the ivs of LOOP are used (this is the path
 826      that leads to the epilog loop; other paths skip the epilog loop).  This
 827      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 828      needs to have its phis updated.
 829  */
 830
 831 static void
 832 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 833                                   tree niters, edge update_e)
 834 {
 835   gphi_iterator gsi, gsi1;
 836   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 837   basic_block update_bb = update_e->dest;
 838   basic_block exit_bb = single_exit (loop)->dest;
 839
 840   /* Make sure there exists a single-predecessor exit bb:  */
 841   gcc_assert (single_pred_p (exit_bb));
 842   gcc_assert (single_succ_edge (exit_bb) == update_e);
 843
 844   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 845        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 846        gsi_next (&gsi), gsi_next (&gsi1))
 847     {
 848       tree init_expr;
 849       tree step_expr, off;
 850       tree type;
 851       tree var, ni, ni_name;
 852       gimple_stmt_iterator last_gsi;
 853
 854       gphi *phi = gsi.phi ();
 855       gphi *phi1 = gsi1.phi ();
 856       if (dump_enabled_p ())
 857         {
 858           dump_printf_loc (MSG_NOTE, vect_location,
 859                            "vect_update_ivs_after_vectorizer: phi: ");
 860           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 861         }
 862
 863       /* Skip reduction and virtual phis.  */
 864       if (!iv_phi_p (phi))
 865         {
 866           if (dump_enabled_p ())
 867             dump_printf_loc (MSG_NOTE, vect_location,
 868                              "reduc or virtual phi. skip.\n");
 869           continue;
 870         }
 871
 872       type = TREE_TYPE (gimple_phi_result (phi));
 873       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 874       step_expr = unshare_expr (step_expr);
 875
 876       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 877          of degree >= 2 or exponential.  */
 878       gcc_assert (!tree_is_chrec (step_expr));
 879
 880       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 881
 882       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 883                          fold_convert (TREE_TYPE (step_expr), niters),
 884                          step_expr);
 885       if (POINTER_TYPE_P (type))
 886         ni = fold_build_pointer_plus (init_expr, off);
 887       else
 888         ni = fold_build2 (PLUS_EXPR, type,
 889                           init_expr, fold_convert (type, off));
 890
 891       var = create_tmp_var (type, "tmp");
 892
 893       last_gsi = gsi_last_bb (exit_bb);
 894       gimple_seq new_stmts = NULL;
 895       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 896       /* Exit_bb shouldn't be empty.  */
 897       if (!gsi_end_p (last_gsi))
 898         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 899       else
 900         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 901
 902       /* Fix phi expressions in the successor bb.  */
 903       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 904     }
 905 }
 906
 907 /* Function vect_gen_prolog_loop_niters
 908
 909    Generate the number of iterations which should be peeled as prolog for the
 910    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 911    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 912    As a result, after the execution of this loop, the data reference DR will
 913    refer to an aligned location.  The following computation is generated:
 914
 915    If the misalignment of DR is known at compile time:
 916      addr_mis = int mis = DR_MISALIGNMENT (dr);
 917    Else, compute address misalignment in bytes:
 918      addr_mis = addr & (vectype_align - 1)
 919
 920    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 921
 922    (elem_size = element type size; an element is the scalar element whose type
 923    is the inner type of the vectype)
 924
 925    The computations will be emitted at the end of BB.  We also compute and
 926    store upper bound (included) of the result in BOUND.
 927
 928    When the step of the data-ref in the loop is not 1 (as in interleaved data
 929    and SLP), the number of iterations of the prolog must be divided by the step
 930    (which is equal to the size of interleaved group).
 931
 932    The above formulas assume that VF == number of elements in the vector. This
 933    may not hold when there are multiple-types in the loop.
 934    In this case, for some data-references in the loop the VF does not represent
 935    the number of elements that fit in the vector.  Therefore, instead of VF we
 936    use TYPE_VECTOR_SUBPARTS.  */
 937
 938 static tree
 939 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 940                              basic_block bb, int *bound)
 941 {
 942   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 943   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 944   tree var;
 945   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 946   gimple_seq stmts = NULL, new_stmts = NULL;
 947   tree iters, iters_name;
 948   gimple *dr_stmt = DR_STMT (dr);
 949   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 950   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 951   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 952   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 953
 954   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 955     {
 956       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 957
 958       if (dump_enabled_p ())
 959         dump_printf_loc (MSG_NOTE, vect_location,
 960                          "known peeling = %d.\n", npeel);
 961
 962       iters = build_int_cst (niters_type, npeel);
 963       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 964     }
 965   else
 966     {
 967       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 968       tree offset = negative
 969           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 970       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 971                                                 &stmts, offset, loop);
 972       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 973       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 974       HOST_WIDE_INT elem_size =
 975                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 976       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 977       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 978       tree nelements_tree = build_int_cst (type, nelements);
 979       tree byte_misalign;
 980       tree elem_misalign;
 981
 982       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 983       byte_misalign =
 984         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 985                      vectype_align_minus_1);
 986
 987       /* Create:  elem_misalign = byte_misalign / element_size  */
 988       elem_misalign =
 989         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 990
 991       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
 992       if (negative)
 993         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
 994       else
 995         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
 996       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
 997       iters = fold_convert (niters_type, iters);
 998       *bound = nelements - 1;
 999     }
1000
1001   if (dump_enabled_p ())
1002     {
1003       dump_printf_loc (MSG_NOTE, vect_location,
1004                        "niters for prolog loop: ");
1005       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1006       dump_printf (MSG_NOTE, "\n");
1007     }
1008
1009   var = create_tmp_var (niters_type, "prolog_loop_niters");
1010   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1011
1012   if (new_stmts)
1013     gimple_seq_add_seq (&stmts, new_stmts);
1014   if (stmts)
1015     {
1016       gcc_assert (single_succ_p (bb));
1017       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1018       if (gsi_end_p (gsi))
1019         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1020       else
1021         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1022     }
1023   return iters_name;
1024 }
1025
1026
1027 /* Function vect_update_init_of_dr
1028
1029    NITERS iterations were peeled from LOOP.  DR represents a data reference
1030    in LOOP.  This function updates the information recorded in DR to
1031    account for the fact that the first NITERS iterations had already been
1032    executed.  Specifically, it updates the OFFSET field of DR.  */
1033
1034 static void
1035 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1036 {
1037   tree offset = DR_OFFSET (dr);
1038
1039   niters = fold_build2 (MULT_EXPR, sizetype,
1040                         fold_convert (sizetype, niters),
1041                         fold_convert (sizetype, DR_STEP (dr)));
1042   offset = fold_build2 (PLUS_EXPR, sizetype,
1043                         fold_convert (sizetype, offset), niters);
1044   DR_OFFSET (dr) = offset;
1045 }
1046
1047
1048 /* Function vect_update_inits_of_drs
1049
1050    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1051    This function updates the information recorded for the data references in
1052    the loop to account for the fact that the first NITERS iterations had
1053    already been executed.  Specifically, it updates the initial_condition of
1054    the access_function of all the data_references in the loop.  */
1055
1056 static void
1057 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1058 {
1059   unsigned int i;
1060   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061   struct data_reference *dr;
1062
1063   if (dump_enabled_p ())
1064     dump_printf_loc (MSG_NOTE, vect_location,
1065                      "=== vect_update_inits_of_dr ===\n");
1066
1067   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1068   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1069     {
1070       gimple_seq seq;
1071       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1072       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1073
1074       niters = fold_convert (sizetype, niters);
1075       niters = force_gimple_operand (niters, &seq, false, var);
1076       if (seq)
1077         {
1078           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1079           gcc_assert (!new_bb);
1080         }
1081     }
1082
1083   FOR_EACH_VEC_ELT (datarefs, i, dr)
1084     vect_update_init_of_dr (dr, niters);
1085 }
1086
1087
1088 /* This function builds ni_name = number of iterations.  Statements
1089    are emitted on the loop preheader edge.  */
1090
1091 tree
1092 vect_build_loop_niters (loop_vec_info loop_vinfo)
1093 {
1094   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1095   if (TREE_CODE (ni) == INTEGER_CST)
1096     return ni;
1097   else
1098     {
1099       tree ni_name, var;
1100       gimple_seq stmts = NULL;
1101       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1102
1103       var = create_tmp_var (TREE_TYPE (ni), "niters");
1104       ni_name = force_gimple_operand (ni, &stmts, false, var);
1105       if (stmts)
1106         gsi_insert_seq_on_edge_immediate (pe, stmts);
1107
1108       return ni_name;
1109     }
1110 }
1111
1112 /* Calculate the number of iterations above which vectorized loop will be
1113    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1114    of prolog loop.  If it's integer const, the integer number is also passed
1115    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1116    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1117    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1118    (rather than vectorized) loop will be executed.  This function stores
1119    upper bound (included) of the result in BOUND_SCALAR.  */
1120
1121 static tree
1122 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1123                              int bound_prolog, int vfm1, int th,
1124                              int *bound_scalar, bool check_profitability)
1125 {
1126   tree type = TREE_TYPE (niters_prolog);
1127   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1128                              build_int_cst (type, vfm1));
1129
1130   *bound_scalar = vfm1 + bound_prolog;
1131   if (check_profitability)
1132     {
1133       /* TH indicates the minimum niters of vectorized loop, while we
1134          compute the maximum niters of scalar loop.  */
1135       th--;
1136       /* Peeling for constant times.  */
1137       if (int_niters_prolog >= 0)
1138         {
1139           *bound_scalar = (int_niters_prolog + vfm1 < th
1140                             ? th
1141                             : vfm1 + int_niters_prolog);
1142           return build_int_cst (type, *bound_scalar);
1143         }
1144       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1145          bound (inlcuded) of niters of prolog loop.  */
1146       if (th >=  vfm1 + bound_prolog)
1147         {
1148           *bound_scalar = th;
1149           return build_int_cst (type, th);
1150         }
1151       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1152       else if (th > vfm1)
1153         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1154     }
1155   return niters;
1156 }
1157
1158 /* This function generates the following statements:
1159
1160    niters = number of iterations loop executes (after peeling)
1161    niters_vector = niters / vf
1162
1163    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1164    true if NITERS doesn't overflow.  */
1165
1166 void
1167 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1168                              tree *niters_vector_ptr, bool niters_no_overflow)
1169 {
1170   tree ni_minus_gap, var;
1171   tree niters_vector;
1172   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1174   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1175
1176   /* If epilogue loop is required because of data accesses with gaps, we
1177      subtract one iteration from the total number of iterations here for
1178      correct calculation of RATIO.  */
1179   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1180     {
1181       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1182                                   niters,
1183                                   build_one_cst (TREE_TYPE (niters)));
1184       if (!is_gimple_val (ni_minus_gap))
1185         {
1186           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1187           gimple *stmts = NULL;
1188           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1189                                                true, var);
1190           gsi_insert_seq_on_edge_immediate (pe, stmts);
1191         }
1192     }
1193   else
1194     ni_minus_gap = niters;
1195
1196   /* Create: niters >> log2(vf) */
1197   /* If it's known that niters == number of latch executions + 1 doesn't
1198      overflow, we can generate niters >> log2(vf); otherwise we generate
1199      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1200      will be at least one.  */
1201   if (niters_no_overflow)
1202     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1203                                  ni_minus_gap, log_vf);
1204   else
1205     niters_vector
1206       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1207                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1208                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1209                                                ni_minus_gap,
1210                                                build_int_cst
1211                                                  (TREE_TYPE (niters), vf)),
1212                                   log_vf),
1213                      build_int_cst (TREE_TYPE (niters), 1));
1214
1215   if (!is_gimple_val (niters_vector))
1216     {
1217       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1218       gimple *stmts = NULL;
1219       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1220       gsi_insert_seq_on_edge_immediate (pe, stmts);
1221     }
1222   *niters_vector_ptr = niters_vector;
1223
1224   return;
1225 }
1226
1227 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1228    loop specified by LOOP_VINFO after vectorization, compute the number
1229    of iterations before vectorization (niters_vector * vf) and store it
1230    to NITERS_VECTOR_MULT_VF_PTR.  */
1231
1232 static void
1233 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1234                                      tree niters_vector,
1235                                      tree *niters_vector_mult_vf_ptr)
1236 {
1237   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1238   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239   tree type = TREE_TYPE (niters_vector);
1240   tree log_vf = build_int_cst (type, exact_log2 (vf));
1241   basic_block exit_bb = single_exit (loop)->dest;
1242
1243   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1244   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1245                                             niters_vector, log_vf);
1246   if (!is_gimple_val (niters_vector_mult_vf))
1247     {
1248       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1249       gimple_seq stmts = NULL;
1250       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1251                                                     &stmts, true, var);
1252       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1253       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1254     }
1255   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1256 }
1257
1258 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1259    from SECOND/FIRST and puts it at the original loop's preheader/exit
1260    edge, the two loops are arranged as below:
1261
1262        preheader_a:
1263      first_loop:
1264        header_a:
1265          i_1 = PHI<i_0, i_2>;
1266          ...
1267          i_2 = i_1 + 1;
1268          if (cond_a)
1269            goto latch_a;
1270          else
1271            goto between_bb;
1272        latch_a:
1273          goto header_a;
1274
1275        between_bb:
1276          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1277
1278      second_loop:
1279        header_b:
1280          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1281                                  or with i_2 if no LCSSA phi is created
1282                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1283          ...
1284          i_4 = i_3 + 1;
1285          if (cond_b)
1286            goto latch_b;
1287          else
1288            goto exit_bb;
1289        latch_b:
1290          goto header_b;
1291
1292        exit_bb:
1293
1294    This function creates loop closed SSA for the first loop; update the
1295    second loop's PHI nodes by replacing argument on incoming edge with the
1296    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1297    is false, Loop closed ssa phis will only be created for non-iv phis for
1298    the first loop.
1299
1300    This function assumes exit bb of the first loop is preheader bb of the
1301    second loop, i.e, between_bb in the example code.  With PHIs updated,
1302    the second loop will execute rest iterations of the first.  */
1303
1304 static void
1305 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1306                                    struct loop *first, struct loop *second,
1307                                    bool create_lcssa_for_iv_phis)
1308 {
1309   gphi_iterator gsi_update, gsi_orig;
1310   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1311
1312   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1313   edge second_preheader_e = loop_preheader_edge (second);
1314   basic_block between_bb = single_exit (first)->dest;
1315
1316   gcc_assert (between_bb == second_preheader_e->src);
1317   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1318   /* Either the first loop or the second is the loop to be vectorized.  */
1319   gcc_assert (loop == first || loop == second);
1320
1321   for (gsi_orig = gsi_start_phis (first->header),
1322        gsi_update = gsi_start_phis (second->header);
1323        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1324        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1325     {
1326       gphi *orig_phi = gsi_orig.phi ();
1327       gphi *update_phi = gsi_update.phi ();
1328
1329       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1330       /* Generate lcssa PHI node for the first loop.  */
1331       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1332       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1333         {
1334           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1335           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1336           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1337           arg = new_res;
1338         }
1339
1340       /* Update PHI node in the second loop by replacing arg on the loop's
1341          incoming edge.  */
1342       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1343     }
1344 }
1345
1346 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1347    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1348    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1349    appear like below:
1350
1351        guard_bb:
1352          if (cond)
1353            goto merge_bb;
1354          else
1355            goto skip_loop;
1356
1357      skip_loop:
1358        header_a:
1359          i_1 = PHI<i_0, i_2>;
1360          ...
1361          i_2 = i_1 + 1;
1362          if (cond_a)
1363            goto latch_a;
1364          else
1365            goto exit_a;
1366        latch_a:
1367          goto header_a;
1368
1369        exit_a:
1370          i_5 = PHI<i_2>;
1371
1372        merge_bb:
1373          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1374
1375      update_loop:
1376        header_b:
1377          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1378          ...
1379          i_4 = i_3 + 1;
1380          if (cond_b)
1381            goto latch_b;
1382          else
1383            goto exit_bb;
1384        latch_b:
1385          goto header_b;
1386
1387        exit_bb:
1388
1389    This function creates PHI nodes at merge_bb and replaces the use of i_5
1390    in the update_loop's PHI node with the result of new PHI result.  */
1391
1392 static void
1393 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1394                                     struct loop *update_loop,
1395                                     edge guard_edge, edge merge_edge)
1396 {
1397   source_location merge_loc, guard_loc;
1398   edge orig_e = loop_preheader_edge (skip_loop);
1399   edge update_e = loop_preheader_edge (update_loop);
1400   gphi_iterator gsi_orig, gsi_update;
1401
1402   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1403         gsi_update = gsi_start_phis (update_loop->header));
1404        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1405        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1406     {
1407       gphi *orig_phi = gsi_orig.phi ();
1408       gphi *update_phi = gsi_update.phi ();
1409
1410       /* Generate new phi node at merge bb of the guard.  */
1411       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1412       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1413
1414       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1415          args in NEW_PHI for these edges.  */
1416       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1417       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1418       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1419       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1420       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1421       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1422
1423       /* Update phi in UPDATE_PHI.  */
1424       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1425     }
1426 }
1427
1428 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1429    this function searches for the corresponding lcssa phi node in exit
1430    bb of LOOP.  If it is found, return the phi result; otherwise return
1431    NULL.  */
1432
1433 static tree
1434 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1435                 gphi *lcssa_phi)
1436 {
1437   gphi_iterator gsi;
1438   edge e = single_exit (loop);
1439
1440   gcc_assert (single_pred_p (e->dest));
1441   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1442     {
1443       gphi *phi = gsi.phi ();
1444       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1445                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1446         return PHI_RESULT (phi);
1447     }
1448   return NULL_TREE;
1449 }
1450
1451 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1452    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1453    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1454    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1455    The CFG looks like:
1456
1457      loop:
1458        header_a:
1459          i_1 = PHI<i_0, i_2>;
1460          ...
1461          i_2 = i_1 + 1;
1462          if (cond_a)
1463            goto latch_a;
1464          else
1465            goto exit_a;
1466        latch_a:
1467          goto header_a;
1468
1469        exit_a:
1470
1471        guard_bb:
1472          if (cond)
1473            goto merge_bb;
1474          else
1475            goto epilog_loop;
1476
1477        ;; fall_through_bb
1478
1479      epilog_loop:
1480        header_b:
1481          i_3 = PHI<i_2, i_4>;
1482          ...
1483          i_4 = i_3 + 1;
1484          if (cond_b)
1485            goto latch_b;
1486          else
1487            goto merge_bb;
1488        latch_b:
1489          goto header_b;
1490
1491        merge_bb:
1492          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1493
1494        exit_bb:
1495          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1496
1497    For each name used out side EPILOG (i.e - for each name that has a lcssa
1498    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1499    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1500    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1501    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1502    in exit_bb will also be updated.  */
1503
1504 static void
1505 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1506                                     edge guard_edge, edge merge_edge)
1507 {
1508   gphi_iterator gsi;
1509   basic_block merge_bb = guard_edge->dest;
1510
1511   gcc_assert (single_succ_p (merge_bb));
1512   edge e = single_succ_edge (merge_bb);
1513   basic_block exit_bb = e->dest;
1514   gcc_assert (single_pred_p (exit_bb));
1515   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1516
1517   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1518     {
1519       gphi *update_phi = gsi.phi ();
1520       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1521       /* This loop-closed-phi actually doesn't represent a use out of the
1522          loop - the phi arg is a constant.  */
1523       if (TREE_CODE (old_arg) != SSA_NAME)
1524         continue;
1525
1526       tree merge_arg = get_current_def (old_arg);
1527       if (!merge_arg)
1528         merge_arg = old_arg;
1529
1530       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1531       /* If the var is live after loop but not a reduction, we simply
1532          use the old arg.  */
1533       if (!guard_arg)
1534         guard_arg = old_arg;
1535
1536       /* Create new phi node in MERGE_BB:  */
1537       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1538       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1539
1540       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1541          the two PHI args in merge_phi for these edges.  */
1542       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1543       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1544
1545       /* Update the original phi in exit_bb.  */
1546       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1547     }
1548 }
1549
1550 /* EPILOG loop is duplicated from the original loop for vectorizing,
1551    the arg of its loop closed ssa PHI needs to be updated.  */
1552
1553 static void
1554 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1555 {
1556   gphi_iterator gsi;
1557   basic_block exit_bb = single_exit (epilog)->dest;
1558
1559   gcc_assert (single_pred_p (exit_bb));
1560   edge e = EDGE_PRED (exit_bb, 0);
1561   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1562     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1563 }
1564
1565 /* Function vect_do_peeling.
1566
1567    Input:
1568    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1569
1570        preheader:
1571      LOOP:
1572        header_bb:
1573          loop_body
1574          if (exit_loop_cond) goto exit_bb
1575          else                goto header_bb
1576        exit_bb:
1577
1578    - NITERS: The number of iterations of the loop.
1579    - NITERSM1: The number of iterations of the loop's latch.
1580    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1581    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1582                               CHECK_PROFITABILITY is true.
1583    Output:
1584    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1585
1586    This function peels prolog and epilog from the loop, adds guards skipping
1587    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1588    would look like:
1589
1590        guard_bb_1:
1591          if (prefer_scalar_loop) goto merge_bb_1
1592          else                    goto guard_bb_2
1593
1594        guard_bb_2:
1595          if (skip_prolog) goto merge_bb_2
1596          else             goto prolog_preheader
1597
1598        prolog_preheader:
1599      PROLOG:
1600        prolog_header_bb:
1601          prolog_body
1602          if (exit_prolog_cond) goto prolog_exit_bb
1603          else                  goto prolog_header_bb
1604        prolog_exit_bb:
1605
1606        merge_bb_2:
1607
1608        vector_preheader:
1609      VECTOR LOOP:
1610        vector_header_bb:
1611          vector_body
1612          if (exit_vector_cond) goto vector_exit_bb
1613          else                  goto vector_header_bb
1614        vector_exit_bb:
1615
1616        guard_bb_3:
1617          if (skip_epilog) goto merge_bb_3
1618          else             goto epilog_preheader
1619
1620        merge_bb_1:
1621
1622        epilog_preheader:
1623      EPILOG:
1624        epilog_header_bb:
1625          epilog_body
1626          if (exit_epilog_cond) goto merge_bb_3
1627          else                  goto epilog_header_bb
1628
1629        merge_bb_3:
1630
1631    Note this function peels prolog and epilog only if it's necessary,
1632    as well as guards.
1633    Returns created epilogue or NULL.
1634
1635    TODO: Guard for prefer_scalar_loop should be emitted along with
1636    versioning conditions if loop versioning is needed.  */
1637
1638
1639 struct loop *
1640 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1641                  tree *niters_vector, int th, bool check_profitability,
1642                  bool niters_no_overflow)
1643 {
1644   edge e, guard_e;
1645   tree type = TREE_TYPE (niters), guard_cond;
1646   basic_block guard_bb, guard_to;
1647   int prob_prolog, prob_vector, prob_epilog;
1648   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1649   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1650   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1651   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1652                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1653
1654   if (!prolog_peeling && !epilog_peeling)
1655     return NULL;
1656
1657   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1658   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1659     vf = 3;
1660   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1661   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1662
1663   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1664   struct loop *first_loop = loop;
1665   create_lcssa_for_virtual_phi (loop);
1666   update_ssa (TODO_update_ssa_only_virtuals);
1667
1668   if (MAY_HAVE_DEBUG_STMTS)
1669     {
1670       gcc_assert (!adjust_vec.exists ());
1671       adjust_vec.create (32);
1672     }
1673   initialize_original_copy_tables ();
1674
1675   /* Prolog loop may be skipped.  */
1676   bool skip_prolog = (prolog_peeling != 0);
1677   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1678      we peel for epilog loop.  */
1679   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1680   /* Epilog loop must be executed if the number of iterations for epilog
1681      loop is known at compile time, otherwise we need to add a check at
1682      the end of vector loop and skip to the end of epilog loop.  */
1683   bool skip_epilog = (prolog_peeling < 0
1684                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1685   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1686   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1687     skip_epilog = false;
1688
1689   /* Record the anchor bb at which guard should be placed if scalar loop
1690      may be preferred.  */
1691   basic_block anchor = loop_preheader_edge (loop)->src;
1692   if (skip_vector)
1693     {
1694       split_edge (loop_preheader_edge (loop));
1695
1696       /* Due to the order in which we peel prolog and epilog, we first
1697          propagate probability to the whole loop.  The purpose is to
1698          avoid adjusting probabilities of both prolog and vector loops
1699          separately.  Note in this case, the probability of epilog loop
1700          needs to be scaled back later.  */
1701       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1702       scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1703                                  REG_BR_PROB_BASE);
1704       scale_loop_profile (loop, prob_vector, bound);
1705     }
1706
1707   tree niters_prolog = build_int_cst (type, 0);
1708   source_location loop_loc = find_loop_location (loop);
1709   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1710   if (prolog_peeling)
1711     {
1712       e = loop_preheader_edge (loop);
1713       if (!slpeel_can_duplicate_loop_p (loop, e))
1714         {
1715           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1716                            "loop can't be duplicated to preheader edge.\n");
1717           gcc_unreachable ();
1718         }
1719       /* Peel prolog and put it on preheader edge of loop.  */
1720       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1721       if (!prolog)
1722         {
1723           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1724                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1725           gcc_unreachable ();
1726         }
1727       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1728       first_loop = prolog;
1729       reset_original_copy_tables ();
1730
1731       /* Generate and update the number of iterations for prolog loop.  */
1732       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1733                                                    &bound_prolog);
1734       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1735
1736       /* Skip the prolog loop.  */
1737       if (skip_prolog)
1738         {
1739           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1740                                     niters_prolog, build_int_cst (type, 0));
1741           guard_bb = loop_preheader_edge (prolog)->src;
1742           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1743           guard_to = split_edge (loop_preheader_edge (loop));
1744           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1745                                            guard_to, guard_bb,
1746                                            inverse_probability (prob_prolog));
1747           e = EDGE_PRED (guard_to, 0);
1748           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1749           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1750
1751           scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1752                                      REG_BR_PROB_BASE);
1753           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1754         }
1755       /* Update init address of DRs.  */
1756       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1757       /* Update niters for vector loop.  */
1758       LOOP_VINFO_NITERS (loop_vinfo)
1759         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1760       LOOP_VINFO_NITERSM1 (loop_vinfo)
1761         = fold_build2 (MINUS_EXPR, type,
1762                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1763       niters = vect_build_loop_niters (loop_vinfo);
1764
1765       /* Prolog iterates at most bound_prolog times, latch iterates at
1766          most bound_prolog - 1 times.  */
1767       record_niter_bound (prolog, bound_prolog - 1, false, true);
1768       delete_update_ssa ();
1769       adjust_vec_debug_stmts ();
1770       scev_reset ();
1771     }
1772
1773   if (epilog_peeling)
1774     {
1775       e = single_exit (loop);
1776       if (!slpeel_can_duplicate_loop_p (loop, e))
1777         {
1778           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1779                            "loop can't be duplicated to exit edge.\n");
1780           gcc_unreachable ();
1781         }
1782       /* Peel epilog and put it on exit edge of loop.  */
1783       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1784       if (!epilog)
1785         {
1786           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1787                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1788           gcc_unreachable ();
1789         }
1790       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1791
1792       /* Scalar version loop may be preferred.  In this case, add guard
1793          and skip to epilog.  Note this only happens when the number of
1794          iterations of loop is unknown at compile time, otherwise this
1795          won't be vectorized.  */
1796       if (skip_vector)
1797         {
1798           /* Additional epilogue iteration is peeled if gap exists.  */
1799           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1800           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1801                                                 bound_prolog,
1802                                                 peel_for_gaps ? vf : vf - 1,
1803                                                 th, &bound_scalar,
1804                                                 check_profitability);
1805           /* Build guard against NITERSM1 since NITERS may overflow.  */
1806           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1807           guard_bb = anchor;
1808           guard_to = split_edge (loop_preheader_edge (epilog));
1809           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1810                                            guard_to, guard_bb,
1811                                            inverse_probability (prob_vector));
1812           e = EDGE_PRED (guard_to, 0);
1813           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1814           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1815
1816           /* Simply propagate profile info from guard_bb to guard_to which is
1817              a merge point of control flow.  */
1818           guard_to->frequency = guard_bb->frequency;
1819           guard_to->count = guard_bb->count;
1820           single_succ_edge (guard_to)->count = guard_to->count;
1821           /* Scale probability of epilog loop back.  */
1822           int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1823           scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1824         }
1825
1826       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1827       tree niters_vector_mult_vf;
1828       /* If loop is peeled for non-zero constant times, now niters refers to
1829          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1830          overflows.  */
1831       niters_no_overflow |= (prolog_peeling > 0);
1832       vect_gen_vector_loop_niters (loop_vinfo, niters,
1833                                    niters_vector, niters_no_overflow);
1834       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1835                                            &niters_vector_mult_vf);
1836       /* Update IVs of original loop as if they were advanced by
1837          niters_vector_mult_vf steps.  */
1838       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1839       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1840       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1841                                         update_e);
1842
1843       if (skip_epilog)
1844         {
1845           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1846                                     niters, niters_vector_mult_vf);
1847           guard_bb = single_exit (loop)->dest;
1848           guard_to = split_edge (single_exit (epilog));
1849           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1850                                            skip_vector ? anchor : guard_bb,
1851                                            inverse_probability (prob_epilog));
1852           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1853                                               single_exit (epilog));
1854           /* Only need to handle basic block before epilog loop if it's not
1855              the guard_bb, which is the case when skip_vector is true.  */
1856           if (guard_bb != bb_before_epilog)
1857             {
1858               prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1859                              + inverse_probability (prob_vector));
1860
1861               scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1862                                          REG_BR_PROB_BASE);
1863             }
1864           scale_loop_profile (epilog, prob_epilog, bound);
1865         }
1866       else
1867         slpeel_update_phi_nodes_for_lcssa (epilog);
1868
1869       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1870       /* We share epilog loop with scalar version loop.  */
1871       bound = MAX (bound, bound_scalar - 1);
1872       record_niter_bound (epilog, bound, false, true);
1873
1874       delete_update_ssa ();
1875       adjust_vec_debug_stmts ();
1876       scev_reset ();
1877     }
1878   adjust_vec.release ();
1879   free_original_copy_tables ();
1880
1881   return epilog;
1882 }
1883
1884 /* Function vect_create_cond_for_niters_checks.
1885
1886    Create a conditional expression that represents the run-time checks for
1887    loop's niter.  The loop is guaranteed to to terminate if the run-time
1888    checks hold.
1889
1890    Input:
1891    COND_EXPR  - input conditional expression.  New conditions will be chained
1892                 with logical AND operation.  If it is NULL, then the function
1893                 is used to return the number of alias checks.
1894    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1895                 to be checked.
1896
1897    Output:
1898    COND_EXPR - conditional expression.
1899
1900    The returned COND_EXPR is the conditional expression to be used in the
1901    if statement that controls which version of the loop gets executed at
1902    runtime.  */
1903
1904 static void
1905 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1906 {
1907   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1908
1909   if (*cond_expr)
1910     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1911                               *cond_expr, part_cond_expr);
1912   else
1913     *cond_expr = part_cond_expr;
1914 }
1915
1916 /* Function vect_create_cond_for_align_checks.
1917
1918    Create a conditional expression that represents the alignment checks for
1919    all of data references (array element references) whose alignment must be
1920    checked at runtime.
1921
1922    Input:
1923    COND_EXPR  - input conditional expression.  New conditions will be chained
1924                 with logical AND operation.
1925    LOOP_VINFO - two fields of the loop information are used.
1926                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1927                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1928
1929    Output:
1930    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1931                          expression.
1932    The returned value is the conditional expression to be used in the if
1933    statement that controls which version of the loop gets executed at runtime.
1934
1935    The algorithm makes two assumptions:
1936      1) The number of bytes "n" in a vector is a power of 2.
1937      2) An address "a" is aligned if a%n is zero and that this
1938         test can be done as a&(n-1) == 0.  For example, for 16
1939         byte vectors the test is a&0xf == 0.  */
1940
1941 static void
1942 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1943                                    tree *cond_expr,
1944                                    gimple_seq *cond_expr_stmt_list)
1945 {
1946   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1947   vec<gimple *> may_misalign_stmts
1948     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1949   gimple *ref_stmt;
1950   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1951   tree mask_cst;
1952   unsigned int i;
1953   tree int_ptrsize_type;
1954   char tmp_name[20];
1955   tree or_tmp_name = NULL_TREE;
1956   tree and_tmp_name;
1957   gimple *and_stmt;
1958   tree ptrsize_zero;
1959   tree part_cond_expr;
1960
1961   /* Check that mask is one less than a power of 2, i.e., mask is
1962      all zeros followed by all ones.  */
1963   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1964
1965   int_ptrsize_type = signed_type_for (ptr_type_node);
1966
1967   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1968      of the first vector of the i'th data reference. */
1969
1970   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1971     {
1972       gimple_seq new_stmt_list = NULL;
1973       tree addr_base;
1974       tree addr_tmp_name;
1975       tree new_or_tmp_name;
1976       gimple *addr_stmt, *or_stmt;
1977       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1978       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1979       bool negative = tree_int_cst_compare
1980         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1981       tree offset = negative
1982         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1983
1984       /* create: addr_tmp = (int)(address_of_first_vector) */
1985       addr_base =
1986         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1987                                               offset, loop);
1988       if (new_stmt_list != NULL)
1989         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1990
1991       sprintf (tmp_name, "addr2int%d", i);
1992       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1993       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1994       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1995
1996       /* The addresses are OR together.  */
1997
1998       if (or_tmp_name != NULL_TREE)
1999         {
2000           /* create: or_tmp = or_tmp | addr_tmp */
2001           sprintf (tmp_name, "orptrs%d", i);
2002           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2003           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2004                                          or_tmp_name, addr_tmp_name);
2005           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2006           or_tmp_name = new_or_tmp_name;
2007         }
2008       else
2009         or_tmp_name = addr_tmp_name;
2010
2011     } /* end for i */
2012
2013   mask_cst = build_int_cst (int_ptrsize_type, mask);
2014
2015   /* create: and_tmp = or_tmp & mask  */
2016   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2017
2018   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2019                                   or_tmp_name, mask_cst);
2020   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2021
2022   /* Make and_tmp the left operand of the conditional test against zero.
2023      if and_tmp has a nonzero bit then some address is unaligned.  */
2024   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2025   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2026                                 and_tmp_name, ptrsize_zero);
2027   if (*cond_expr)
2028     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2029                               *cond_expr, part_cond_expr);
2030   else
2031     *cond_expr = part_cond_expr;
2032 }
2033
2034 /* Given two data references and segment lengths described by DR_A and DR_B,
2035    create expression checking if the two addresses ranges intersect with
2036    each other based on index of the two addresses.  This can only be done
2037    if DR_A and DR_B referring to the same (array) object and the index is
2038    the only difference.  For example:
2039
2040                        DR_A                           DR_B
2041       data-ref         arr[i]                         arr[j]
2042       base_object      arr                            arr
2043       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2044
2045    The addresses and their index are like:
2046
2047         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2048      ------------------------------------------------------->
2049         |   |   |   |   |          |   |   |   |   |
2050      ------------------------------------------------------->
2051         i_0 ...         i_0+4      j_0 ...         j_0+4
2052
2053    We can create expression based on index rather than address:
2054
2055      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2056
2057    Note evolution step of index needs to be considered in comparison.  */
2058
2059 static bool
2060 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2061                                      const dr_with_seg_len& dr_a,
2062                                      const dr_with_seg_len& dr_b)
2063 {
2064   if (integer_zerop (DR_STEP (dr_a.dr))
2065       || integer_zerop (DR_STEP (dr_b.dr))
2066       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2067     return false;
2068
2069   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2070     return false;
2071
2072   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2073     return false;
2074
2075   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2076     return false;
2077
2078   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2079     return false;
2080
2081   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2082
2083   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2084   unsigned HOST_WIDE_INT abs_step
2085     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2086
2087   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2088   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2089   /* Infer the number of iterations with which the memory segment is accessed
2090      by DR.  In other words, alias is checked if memory segment accessed by
2091      DR_A in some iterations intersect with memory segment accessed by DR_B
2092      in the same amount iterations.
2093      Note segnment length is a linear function of number of iterations with
2094      DR_STEP as the coefficient.  */
2095   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2096   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2097
2098   unsigned int i;
2099   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2100   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2101     {
2102       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2103       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2104       /* Two indices must be the same if they are not scev, or not scev wrto
2105          current loop being vecorized.  */
2106       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2107           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2108           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2109           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2110         {
2111           if (operand_equal_p (access1, access2, 0))
2112             continue;
2113
2114           return false;
2115         }
2116       /* The two indices must have the same step.  */
2117       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2118         return false;
2119
2120       tree idx_step = CHREC_RIGHT (access1);
2121       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2122       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2123       /* Index must evaluate in the same direction as DR.  */
2124       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2125
2126       tree min1 = CHREC_LEFT (access1);
2127       tree min2 = CHREC_LEFT (access2);
2128       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2129         return false;
2130
2131       /* Ideally, alias can be checked against loop's control IV, but we
2132          need to prove linear mapping between control IV and reference
2133          index.  Although that should be true, we check against (array)
2134          index of data reference.  Like segment length, index length is
2135          linear function of the number of iterations with index_step as
2136          the coefficient, i.e, niter_len * idx_step.  */
2137       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2138                                    build_int_cst (TREE_TYPE (min1),
2139                                                   niter_len1));
2140       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2141                                    build_int_cst (TREE_TYPE (min2),
2142                                                   niter_len2));
2143       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2144       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2145       /* Adjust ranges for negative step.  */
2146       if (neg_step)
2147         {
2148           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2149           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2150                               CHREC_LEFT (access1), idx_step);
2151           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2152           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2153                               CHREC_LEFT (access2), idx_step);
2154         }
2155       tree part_cond_expr
2156         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2157             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2158             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2159       if (*cond_expr)
2160         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2161                                   *cond_expr, part_cond_expr);
2162       else
2163         *cond_expr = part_cond_expr;
2164     }
2165   return true;
2166 }
2167
2168 /* Given two data references and segment lengths described by DR_A and DR_B,
2169    create expression checking if the two addresses ranges intersect with
2170    each other:
2171
2172      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2173      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2174
2175 static void
2176 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2177                                const dr_with_seg_len& dr_a,
2178                                const dr_with_seg_len& dr_b)
2179 {
2180   *cond_expr = NULL_TREE;
2181   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2182     return;
2183
2184   tree segment_length_a = dr_a.seg_len;
2185   tree segment_length_b = dr_b.seg_len;
2186   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2187   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2188   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2189
2190   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2191                           offset_a, DR_INIT (dr_a.dr));
2192   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2193                           offset_b, DR_INIT (dr_b.dr));
2194   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2195   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2196
2197   tree seg_a_min = addr_base_a;
2198   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2199   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2200      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2201      [a, a+12) */
2202   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2203     {
2204       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2205       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2206       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2207     }
2208
2209   tree seg_b_min = addr_base_b;
2210   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2211   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2212     {
2213       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2214       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2215       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2216     }
2217   *cond_expr
2218     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2219         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2220         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2221 }
2222
2223 /* Function vect_create_cond_for_alias_checks.
2224
2225    Create a conditional expression that represents the run-time checks for
2226    overlapping of address ranges represented by a list of data references
2227    relations passed as input.
2228
2229    Input:
2230    COND_EXPR  - input conditional expression.  New conditions will be chained
2231                 with logical AND operation.  If it is NULL, then the function
2232                 is used to return the number of alias checks.
2233    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2234                 to be checked.
2235
2236    Output:
2237    COND_EXPR - conditional expression.
2238
2239    The returned COND_EXPR is the conditional expression to be used in the if
2240    statement that controls which version of the loop gets executed at runtime.
2241 */
2242
2243 void
2244 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2245 {
2246   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2247     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2248   tree part_cond_expr;
2249
2250   if (comp_alias_ddrs.is_empty ())
2251     return;
2252
2253   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2254     {
2255       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2256       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2257
2258       if (dump_enabled_p ())
2259         {
2260           dump_printf_loc (MSG_NOTE, vect_location,
2261                            "create runtime check for data references ");
2262           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2263           dump_printf (MSG_NOTE, " and ");
2264           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2265           dump_printf (MSG_NOTE, "\n");
2266         }
2267
2268       /* Create condition expression for each pair data references.  */
2269       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2270       if (*cond_expr)
2271         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2272                                   *cond_expr, part_cond_expr);
2273       else
2274         *cond_expr = part_cond_expr;
2275     }
2276
2277   if (dump_enabled_p ())
2278     dump_printf_loc (MSG_NOTE, vect_location,
2279                      "created %u versioning for alias checks.\n",
2280                      comp_alias_ddrs.length ());
2281 }
2282
2283
2284 /* Function vect_loop_versioning.
2285
2286    If the loop has data references that may or may not be aligned or/and
2287    has data reference relations whose independence was not proven then
2288    two versions of the loop need to be generated, one which is vectorized
2289    and one which isn't.  A test is then generated to control which of the
2290    loops is executed.  The test checks for the alignment of all of the
2291    data references that may or may not be aligned.  An additional
2292    sequence of runtime tests is generated for each pairs of DDRs whose
2293    independence was not proven.  The vectorized version of loop is
2294    executed only if both alias and alignment tests are passed.
2295
2296    The test generated to check which version of loop is executed
2297    is modified to also check for profitability as indicated by the
2298    cost model threshold TH.
2299
2300    The versioning precondition(s) are placed in *COND_EXPR and
2301    *COND_EXPR_STMT_LIST.  */
2302
2303 void
2304 vect_loop_versioning (loop_vec_info loop_vinfo,
2305                       unsigned int th, bool check_profitability)
2306 {
2307   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2308   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2309   basic_block condition_bb;
2310   gphi_iterator gsi;
2311   gimple_stmt_iterator cond_exp_gsi;
2312   basic_block merge_bb;
2313   basic_block new_exit_bb;
2314   edge new_exit_e, e;
2315   gphi *orig_phi, *new_phi;
2316   tree cond_expr = NULL_TREE;
2317   gimple_seq cond_expr_stmt_list = NULL;
2318   tree arg;
2319   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2320   gimple_seq gimplify_stmt_list = NULL;
2321   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2322   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2323   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2324   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2325
2326   if (check_profitability)
2327     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2328                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2329                                                        th));
2330
2331   if (version_niter)
2332     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2333
2334   if (cond_expr)
2335     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2336                                         is_gimple_condexpr, NULL_TREE);
2337
2338   if (version_align)
2339     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2340                                        &cond_expr_stmt_list);
2341
2342   if (version_alias)
2343     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2344
2345   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2346                                       is_gimple_condexpr, NULL_TREE);
2347   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2348
2349   initialize_original_copy_tables ();
2350   if (scalar_loop)
2351     {
2352       edge scalar_e;
2353       basic_block preheader, scalar_preheader;
2354
2355       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2356          scale LOOP's frequencies instead.  */
2357       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2358                             prob, REG_BR_PROB_BASE - prob,
2359                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2360       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2361       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2362          while we need to move it above LOOP's preheader.  */
2363       e = loop_preheader_edge (loop);
2364       scalar_e = loop_preheader_edge (scalar_loop);
2365       gcc_assert (empty_block_p (e->src)
2366                   && single_pred_p (e->src));
2367       gcc_assert (empty_block_p (scalar_e->src)
2368                   && single_pred_p (scalar_e->src));
2369       gcc_assert (single_pred_p (condition_bb));
2370       preheader = e->src;
2371       scalar_preheader = scalar_e->src;
2372       scalar_e = find_edge (condition_bb, scalar_preheader);
2373       e = single_pred_edge (preheader);
2374       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2375                                       scalar_preheader);
2376       redirect_edge_and_branch_force (scalar_e, preheader);
2377       redirect_edge_and_branch_force (e, condition_bb);
2378       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2379                                single_pred (condition_bb));
2380       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2381                                single_pred (scalar_preheader));
2382       set_immediate_dominator (CDI_DOMINATORS, preheader,
2383                                condition_bb);
2384     }
2385   else
2386     nloop = loop_version (loop, cond_expr, &condition_bb,
2387                           prob, REG_BR_PROB_BASE - prob,
2388                           prob, REG_BR_PROB_BASE - prob, true);
2389
2390   if (version_niter)
2391     {
2392       /* The versioned loop could be infinite, we need to clear existing
2393          niter information which is copied from the original loop.  */
2394       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2395       vect_free_loop_info_assumptions (nloop);
2396       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2397       loop_constraint_set (loop, LOOP_C_INFINITE);
2398     }
2399
2400   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2401       && dump_enabled_p ())
2402     {
2403       if (version_alias)
2404         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2405                          "loop versioned for vectorization because of "
2406                          "possible aliasing\n");
2407       if (version_align)
2408         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2409                          "loop versioned for vectorization to enhance "
2410                          "alignment\n");
2411
2412     }
2413   free_original_copy_tables ();
2414
2415   /* Loop versioning violates an assumption we try to maintain during
2416      vectorization - that the loop exit block has a single predecessor.
2417      After versioning, the exit block of both loop versions is the same
2418      basic block (i.e. it has two predecessors). Just in order to simplify
2419      following transformations in the vectorizer, we fix this situation
2420      here by adding a new (empty) block on the exit-edge of the loop,
2421      with the proper loop-exit phis to maintain loop-closed-form.
2422      If loop versioning wasn't done from loop, but scalar_loop instead,
2423      merge_bb will have already just a single successor.  */
2424
2425   merge_bb = single_exit (loop)->dest;
2426   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2427     {
2428       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2429       new_exit_bb = split_edge (single_exit (loop));
2430       new_exit_e = single_exit (loop);
2431       e = EDGE_SUCC (new_exit_bb, 0);
2432
2433       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2434         {
2435           tree new_res;
2436           orig_phi = gsi.phi ();
2437           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2438           new_phi = create_phi_node (new_res, new_exit_bb);
2439           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2440           add_phi_arg (new_phi, arg, new_exit_e,
2441                        gimple_phi_arg_location_from_edge (orig_phi, e));
2442           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2443         }
2444     }
2445
2446   /* End loop-exit-fixes after versioning.  */
2447
2448   if (cond_expr_stmt_list)
2449     {
2450       cond_exp_gsi = gsi_last_bb (condition_bb);
2451       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2452                              GSI_SAME_STMT);
2453     }
2454   update_ssa (TODO_update_ssa);
2455 }