gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120               else
 121                 continue;
 122             }
 123         }
 124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 125            gsi_next (&gsi))
 126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 127     }
 128 }
 129
 130
 131 struct adjust_info
 132 {
 133   tree from, to;
 134   basic_block bb;
 135 };
 136
 137 /* A stack of values to be adjusted in debug stmts.  We have to
 138    process them LIFO, so that the closest substitution applies.  If we
 139    processed them FIFO, without the stack, we might substitute uses
 140    with a PHI DEF that would soon become non-dominant, and when we got
 141    to the suitable one, it wouldn't have anything to substitute any
 142    more.  */
 143 static vec<adjust_info, va_heap> adjust_vec;
 144
 145 /* Adjust any debug stmts that referenced AI->from values to use the
 146    loop-closed AI->to, if the references are dominated by AI->bb and
 147    not by the definition of AI->from.  */
 148
 149 static void
 150 adjust_debug_stmts_now (adjust_info *ai)
 151 {
 152   basic_block bbphi = ai->bb;
 153   tree orig_def = ai->from;
 154   tree new_def = ai->to;
 155   imm_use_iterator imm_iter;
 156   gimple *stmt;
 157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 158
 159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 160
 161   /* Adjust any debug stmts that held onto non-loop-closed
 162      references.  */
 163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 164     {
 165       use_operand_p use_p;
 166       basic_block bbuse;
 167
 168       if (!is_gimple_debug (stmt))
 169         continue;
 170
 171       gcc_assert (gimple_debug_bind_p (stmt));
 172
 173       bbuse = gimple_bb (stmt);
 174
 175       if ((bbuse == bbphi
 176            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 177           && !(bbuse == bbdef
 178                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 179         {
 180           if (new_def)
 181             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 182               SET_USE (use_p, new_def);
 183           else
 184             {
 185               gimple_debug_bind_reset_value (stmt);
 186               update_stmt (stmt);
 187             }
 188         }
 189     }
 190 }
 191
 192 /* Adjust debug stmts as scheduled before.  */
 193
 194 static void
 195 adjust_vec_debug_stmts (void)
 196 {
 197   if (!MAY_HAVE_DEBUG_STMTS)
 198     return;
 199
 200   gcc_assert (adjust_vec.exists ());
 201
 202   while (!adjust_vec.is_empty ())
 203     {
 204       adjust_debug_stmts_now (&adjust_vec.last ());
 205       adjust_vec.pop ();
 206     }
 207 }
 208
 209 /* Adjust any debug stmts that referenced FROM values to use the
 210    loop-closed TO, if the references are dominated by BB and not by
 211    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 212    will be postponed until adjust_vec_debug_stmts is called.  */
 213
 214 static void
 215 adjust_debug_stmts (tree from, tree to, basic_block bb)
 216 {
 217   adjust_info ai;
 218
 219   if (MAY_HAVE_DEBUG_STMTS
 220       && TREE_CODE (from) == SSA_NAME
 221       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 222       && ! virtual_operand_p (from))
 223     {
 224       ai.from = from;
 225       ai.to = to;
 226       ai.bb = bb;
 227
 228       if (adjust_vec.exists ())
 229         adjust_vec.safe_push (ai);
 230       else
 231         adjust_debug_stmts_now (&ai);
 232     }
 233 }
 234
 235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 236    to adjust any debug stmts that referenced the old phi arg,
 237    presumably non-loop-closed references left over from other
 238    transformations.  */
 239
 240 static void
 241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 242 {
 243   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 244
 245   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 246
 247   if (MAY_HAVE_DEBUG_STMTS)
 248     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 249                         gimple_bb (update_phi));
 250 }
 251
 252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 253    that starts at zero, increases by one and its limit is NITERS.
 254
 255    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 256
 257 void
 258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 259 {
 260   tree indx_before_incr, indx_after_incr;
 261   gcond *cond_stmt;
 262   gcond *orig_cond;
 263   edge exit_edge = single_exit (loop);
 264   gimple_stmt_iterator loop_cond_gsi;
 265   gimple_stmt_iterator incr_gsi;
 266   bool insert_after;
 267   tree init = build_int_cst (TREE_TYPE (niters), 0);
 268   tree step = build_int_cst (TREE_TYPE (niters), 1);
 269   source_location loop_loc;
 270   enum tree_code code;
 271
 272   orig_cond = get_loop_exit_condition (loop);
 273   gcc_assert (orig_cond);
 274   loop_cond_gsi = gsi_for_stmt (orig_cond);
 275
 276   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 277   create_iv (init, step, NULL_TREE, loop,
 278              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 279
 280   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 281                                               true, NULL_TREE, true,
 282                                               GSI_SAME_STMT);
 283   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 284                                      true, GSI_SAME_STMT);
 285
 286   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 287   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 288                                  NULL_TREE);
 289
 290   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 291
 292   /* Remove old loop exit test:  */
 293   gsi_remove (&loop_cond_gsi, true);
 294   free_stmt_vec_info (orig_cond);
 295
 296   loop_loc = find_loop_location (loop);
 297   if (dump_enabled_p ())
 298     {
 299       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 300         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 301                      LOCATION_LINE (loop_loc));
 302       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 303     }
 304
 305   /* Record the number of latch iterations.  */
 306   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 307                                      build_int_cst (TREE_TYPE (niters), 1));
 308 }
 309
 310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 311    For all PHI arguments in FROM->dest and TO->dest from those
 312    edges ensure that TO->dest PHI arguments have current_def
 313    to that in from.  */
 314
 315 static void
 316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 317 {
 318   gimple_stmt_iterator gsi_from, gsi_to;
 319
 320   for (gsi_from = gsi_start_phis (from->dest),
 321        gsi_to = gsi_start_phis (to->dest);
 322        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 323     {
 324       gimple *from_phi = gsi_stmt (gsi_from);
 325       gimple *to_phi = gsi_stmt (gsi_to);
 326       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 327       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 328       if (virtual_operand_p (from_arg))
 329         {
 330           gsi_next (&gsi_from);
 331           continue;
 332         }
 333       if (virtual_operand_p (to_arg))
 334         {
 335           gsi_next (&gsi_to);
 336           continue;
 337         }
 338       if (TREE_CODE (from_arg) != SSA_NAME)
 339         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 340       else
 341         {
 342           if (get_current_def (to_arg) == NULL_TREE)
 343             set_current_def (to_arg, get_current_def (from_arg));
 344         }
 345       gsi_next (&gsi_from);
 346       gsi_next (&gsi_to);
 347     }
 348
 349   gphi *from_phi = get_virtual_phi (from->dest);
 350   gphi *to_phi = get_virtual_phi (to->dest);
 351   if (from_phi)
 352     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 353                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 354 }
 355
 356
 357 /* Given LOOP this function generates a new copy of it and puts it
 358    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 359    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 360    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 361    entry or exit of LOOP.  */
 362
 363 struct loop *
 364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 365                                         struct loop *scalar_loop, edge e)
 366 {
 367   struct loop *new_loop;
 368   basic_block *new_bbs, *bbs, *pbbs;
 369   bool at_exit;
 370   bool was_imm_dom;
 371   basic_block exit_dest;
 372   edge exit, new_exit;
 373   bool duplicate_outer_loop = false;
 374
 375   exit = single_exit (loop);
 376   at_exit = (e == exit);
 377   if (!at_exit && e != loop_preheader_edge (loop))
 378     return NULL;
 379
 380   if (scalar_loop == NULL)
 381     scalar_loop = loop;
 382
 383   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 384   pbbs = bbs + 1;
 385   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 386   /* Allow duplication of outer loops.  */
 387   if (scalar_loop->inner)
 388     duplicate_outer_loop = true;
 389   /* Check whether duplication is possible.  */
 390   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 391     {
 392       free (bbs);
 393       return NULL;
 394     }
 395
 396   /* Generate new loop structure.  */
 397   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 398   duplicate_subloops (scalar_loop, new_loop);
 399
 400   exit_dest = exit->dest;
 401   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 402                                           exit_dest) == loop->header ?
 403                  true : false);
 404
 405   /* Also copy the pre-header, this avoids jumping through hoops to
 406      duplicate the loop entry PHI arguments.  Create an empty
 407      pre-header unconditionally for this.  */
 408   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 409   edge entry_e = single_pred_edge (preheader);
 410   bbs[0] = preheader;
 411   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 412
 413   exit = single_exit (scalar_loop);
 414   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 415             &exit, 1, &new_exit, NULL,
 416             at_exit ? loop->latch : e->src, true);
 417   exit = single_exit (loop);
 418   basic_block new_preheader = new_bbs[0];
 419
 420   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 421
 422   if (scalar_loop != loop)
 423     {
 424       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 425          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 426          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 427          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 428          header) to have current_def set, so copy them over.  */
 429       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 430                                                 exit);
 431       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 432                                                            0),
 433                                                 EDGE_SUCC (loop->latch, 0));
 434     }
 435
 436   if (at_exit) /* Add the loop copy at exit.  */
 437     {
 438       if (scalar_loop != loop)
 439         {
 440           gphi_iterator gsi;
 441           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 442
 443           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 444                gsi_next (&gsi))
 445             {
 446               gphi *phi = gsi.phi ();
 447               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 448               location_t orig_locus
 449                 = gimple_phi_arg_location_from_edge (phi, e);
 450
 451               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 452             }
 453         }
 454       redirect_edge_and_branch_force (e, new_preheader);
 455       flush_pending_stmts (e);
 456       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 457       if (was_imm_dom || duplicate_outer_loop)
 458         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 459
 460       /* And remove the non-necessary forwarder again.  Keep the other
 461          one so we have a proper pre-header for the loop at the exit edge.  */
 462       redirect_edge_pred (single_succ_edge (preheader),
 463                           single_pred (preheader));
 464       delete_basic_block (preheader);
 465       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 466                                loop_preheader_edge (scalar_loop)->src);
 467     }
 468   else /* Add the copy at entry.  */
 469     {
 470       if (scalar_loop != loop)
 471         {
 472           /* Remove the non-necessary forwarder of scalar_loop again.  */
 473           redirect_edge_pred (single_succ_edge (preheader),
 474                               single_pred (preheader));
 475           delete_basic_block (preheader);
 476           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 477                                    loop_preheader_edge (scalar_loop)->src);
 478           preheader = split_edge (loop_preheader_edge (loop));
 479           entry_e = single_pred_edge (preheader);
 480         }
 481
 482       redirect_edge_and_branch_force (entry_e, new_preheader);
 483       flush_pending_stmts (entry_e);
 484       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 485
 486       redirect_edge_and_branch_force (new_exit, preheader);
 487       flush_pending_stmts (new_exit);
 488       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 489
 490       /* And remove the non-necessary forwarder again.  Keep the other
 491          one so we have a proper pre-header for the loop at the exit edge.  */
 492       redirect_edge_pred (single_succ_edge (new_preheader),
 493                           single_pred (new_preheader));
 494       delete_basic_block (new_preheader);
 495       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 496                                loop_preheader_edge (new_loop)->src);
 497     }
 498
 499   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 500     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 501
 502   if (scalar_loop != loop)
 503     {
 504       /* Update new_loop->header PHIs, so that on the preheader
 505          edge they are the ones from loop rather than scalar_loop.  */
 506       gphi_iterator gsi_orig, gsi_new;
 507       edge orig_e = loop_preheader_edge (loop);
 508       edge new_e = loop_preheader_edge (new_loop);
 509
 510       for (gsi_orig = gsi_start_phis (loop->header),
 511            gsi_new = gsi_start_phis (new_loop->header);
 512            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 513            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 514         {
 515           gphi *orig_phi = gsi_orig.phi ();
 516           gphi *new_phi = gsi_new.phi ();
 517           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 518           location_t orig_locus
 519             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 520
 521           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 522         }
 523     }
 524
 525   free (new_bbs);
 526   free (bbs);
 527
 528   checking_verify_dominators (CDI_DOMINATORS);
 529
 530   return new_loop;
 531 }
 532
 533
 534 /* Given the condition expression COND, put it as the last statement of
 535    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 536    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 537    skip the loop.  PROBABILITY is the skip edge's probability.  Mark the
 538    new edge as irreducible if IRREDUCIBLE_P is true.  */
 539
 540 static edge
 541 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 542                        basic_block guard_to, basic_block dom_bb,
 543                        int probability, bool irreducible_p)
 544 {
 545   gimple_stmt_iterator gsi;
 546   edge new_e, enter_e;
 547   gcond *cond_stmt;
 548   gimple_seq gimplify_stmt_list = NULL;
 549
 550   enter_e = EDGE_SUCC (guard_bb, 0);
 551   enter_e->flags &= ~EDGE_FALLTHRU;
 552   enter_e->flags |= EDGE_FALSE_VALUE;
 553   gsi = gsi_last_bb (guard_bb);
 554
 555   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 556                                  NULL_TREE);
 557   if (gimplify_stmt_list)
 558     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 559
 560   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 561   gsi = gsi_last_bb (guard_bb);
 562   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 563
 564   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 565   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 566
 567   new_e->count = guard_bb->count;
 568   new_e->probability = probability;
 569   new_e->count = apply_probability (enter_e->count, probability);
 570   if (irreducible_p)
 571     new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
 572
 573   enter_e->count -= new_e->count;
 574   enter_e->probability = inverse_probability (probability);
 575   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 576
 577   /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS.  */
 578   if (enter_e->dest->loop_father->header == enter_e->dest)
 579     split_edge (enter_e);
 580
 581   return new_e;
 582 }
 583
 584
 585 /* This function verifies that the following restrictions apply to LOOP:
 586    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 587        for innermost loop and 5 basic blocks for outer-loop.
 588    (2) it is single entry, single exit
 589    (3) its exit condition is the last stmt in the header
 590    (4) E is the entry/exit edge of LOOP.
 591  */
 592
 593 bool
 594 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 595 {
 596   edge exit_e = single_exit (loop);
 597   edge entry_e = loop_preheader_edge (loop);
 598   gcond *orig_cond = get_loop_exit_condition (loop);
 599   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 600   unsigned int num_bb = loop->inner? 5 : 2;
 601
 602   /* All loops have an outer scope; the only case loop->outer is NULL is for
 603      the function itself.  */
 604   if (!loop_outer (loop)
 605       || loop->num_nodes != num_bb
 606       || !empty_block_p (loop->latch)
 607       || !single_exit (loop)
 608       /* Verify that new loop exit condition can be trivially modified.  */
 609       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 610       || (e != exit_e && e != entry_e))
 611     return false;
 612
 613   return true;
 614 }
 615
 616 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 617    in the exit bb and rename all the uses after the loop.  This simplifies
 618    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 619    (but normally loop closed SSA form doesn't require virtual PHIs to be
 620    in the same form).  Doing this early simplifies the checking what
 621    uses should be renamed.  */
 622
 623 static void
 624 create_lcssa_for_virtual_phi (struct loop *loop)
 625 {
 626   gphi_iterator gsi;
 627   edge exit_e = single_exit (loop);
 628
 629   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 630     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 631       {
 632         gphi *phi = gsi.phi ();
 633         for (gsi = gsi_start_phis (exit_e->dest);
 634              !gsi_end_p (gsi); gsi_next (&gsi))
 635           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 636             break;
 637         if (gsi_end_p (gsi))
 638           {
 639             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 640             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 641             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 642             imm_use_iterator imm_iter;
 643             gimple *stmt;
 644             use_operand_p use_p;
 645
 646             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 647             gimple_phi_set_result (new_phi, new_vop);
 648             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 649               if (stmt != new_phi
 650                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 651                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 652                   SET_USE (use_p, new_vop);
 653           }
 654         break;
 655       }
 656
 657 }
 658
 659 /* Function vect_get_loop_location.
 660
 661    Extract the location of the loop in the source code.
 662    If the loop is not well formed for vectorization, an estimated
 663    location is calculated.
 664    Return the loop location if succeed and NULL if not.  */
 665
 666 source_location
 667 find_loop_location (struct loop *loop)
 668 {
 669   gimple *stmt = NULL;
 670   basic_block bb;
 671   gimple_stmt_iterator si;
 672
 673   if (!loop)
 674     return UNKNOWN_LOCATION;
 675
 676   stmt = get_loop_exit_condition (loop);
 677
 678   if (stmt
 679       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 680     return gimple_location (stmt);
 681
 682   /* If we got here the loop is probably not "well formed",
 683      try to estimate the loop location */
 684
 685   if (!loop->header)
 686     return UNKNOWN_LOCATION;
 687
 688   bb = loop->header;
 689
 690   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 691     {
 692       stmt = gsi_stmt (si);
 693       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 694         return gimple_location (stmt);
 695     }
 696
 697   return UNKNOWN_LOCATION;
 698 }
 699
 700 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 701
 702 static bool
 703 iv_phi_p (gphi *phi)
 704 {
 705   if (virtual_operand_p (PHI_RESULT (phi)))
 706     return false;
 707
 708   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 709   gcc_assert (stmt_info != NULL);
 710   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 711       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 712     return false;
 713
 714   return true;
 715 }
 716
 717 /* Function vect_can_advance_ivs_p
 718
 719    In case the number of iterations that LOOP iterates is unknown at compile
 720    time, an epilog loop will be generated, and the loop induction variables
 721    (IVs) will be "advanced" to the value they are supposed to take just before
 722    the epilog loop.  Here we check that the access function of the loop IVs
 723    and the expression that represents the loop bound are simple enough.
 724    These restrictions will be relaxed in the future.  */
 725
 726 bool
 727 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 728 {
 729   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 730   basic_block bb = loop->header;
 731   gphi_iterator gsi;
 732
 733   /* Analyze phi functions of the loop header.  */
 734
 735   if (dump_enabled_p ())
 736     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 737   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 738     {
 739       tree evolution_part;
 740
 741       gphi *phi = gsi.phi ();
 742       if (dump_enabled_p ())
 743         {
 744           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 745           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 746         }
 747
 748       /* Skip virtual phi's. The data dependences that are associated with
 749          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 750
 751          Skip reduction phis.  */
 752       if (!iv_phi_p (phi))
 753         {
 754           if (dump_enabled_p ())
 755             dump_printf_loc (MSG_NOTE, vect_location,
 756                              "reduc or virtual phi. skip.\n");
 757           continue;
 758         }
 759
 760       /* Analyze the evolution function.  */
 761
 762       evolution_part
 763         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 764       if (evolution_part == NULL_TREE)
 765         {
 766           if (dump_enabled_p ())
 767             dump_printf (MSG_MISSED_OPTIMIZATION,
 768                          "No access function or evolution.\n");
 769           return false;
 770         }
 771
 772       /* FORNOW: We do not transform initial conditions of IVs
 773          which evolution functions are not invariants in the loop.  */
 774
 775       if (!expr_invariant_in_loop_p (loop, evolution_part))
 776         {
 777           if (dump_enabled_p ())
 778             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 779                              "evolution not invariant in loop.\n");
 780           return false;
 781         }
 782
 783       /* FORNOW: We do not transform initial conditions of IVs
 784          which evolution functions are a polynomial of degree >= 2.  */
 785
 786       if (tree_is_chrec (evolution_part))
 787         {
 788           if (dump_enabled_p ())
 789             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 790                              "evolution is chrec.\n");
 791           return false;
 792         }
 793     }
 794
 795   return true;
 796 }
 797
 798
 799 /*   Function vect_update_ivs_after_vectorizer.
 800
 801      "Advance" the induction variables of LOOP to the value they should take
 802      after the execution of LOOP.  This is currently necessary because the
 803      vectorizer does not handle induction variables that are used after the
 804      loop.  Such a situation occurs when the last iterations of LOOP are
 805      peeled, because:
 806      1. We introduced new uses after LOOP for IVs that were not originally used
 807         after LOOP: the IVs of LOOP are now used by an epilog loop.
 808      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 809         times, whereas the loop IVs should be bumped N times.
 810
 811      Input:
 812      - LOOP - a loop that is going to be vectorized. The last few iterations
 813               of LOOP were peeled.
 814      - NITERS - the number of iterations that LOOP executes (before it is
 815                 vectorized). i.e, the number of times the ivs should be bumped.
 816      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 817                   coming out from LOOP on which there are uses of the LOOP ivs
 818                   (this is the path from LOOP->exit to epilog_loop->preheader).
 819
 820                   The new definitions of the ivs are placed in LOOP->exit.
 821                   The phi args associated with the edge UPDATE_E in the bb
 822                   UPDATE_E->dest are updated accordingly.
 823
 824      Assumption 1: Like the rest of the vectorizer, this function assumes
 825      a single loop exit that has a single predecessor.
 826
 827      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 828      organized in the same order.
 829
 830      Assumption 3: The access function of the ivs is simple enough (see
 831      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 832
 833      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 834      coming out of LOOP on which the ivs of LOOP are used (this is the path
 835      that leads to the epilog loop; other paths skip the epilog loop).  This
 836      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 837      needs to have its phis updated.
 838  */
 839
 840 static void
 841 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 842                                   tree niters, edge update_e)
 843 {
 844   gphi_iterator gsi, gsi1;
 845   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 846   basic_block update_bb = update_e->dest;
 847   basic_block exit_bb = single_exit (loop)->dest;
 848
 849   /* Make sure there exists a single-predecessor exit bb:  */
 850   gcc_assert (single_pred_p (exit_bb));
 851   gcc_assert (single_succ_edge (exit_bb) == update_e);
 852
 853   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 854        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 855        gsi_next (&gsi), gsi_next (&gsi1))
 856     {
 857       tree init_expr;
 858       tree step_expr, off;
 859       tree type;
 860       tree var, ni, ni_name;
 861       gimple_stmt_iterator last_gsi;
 862
 863       gphi *phi = gsi.phi ();
 864       gphi *phi1 = gsi1.phi ();
 865       if (dump_enabled_p ())
 866         {
 867           dump_printf_loc (MSG_NOTE, vect_location,
 868                            "vect_update_ivs_after_vectorizer: phi: ");
 869           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 870         }
 871
 872       /* Skip reduction and virtual phis.  */
 873       if (!iv_phi_p (phi))
 874         {
 875           if (dump_enabled_p ())
 876             dump_printf_loc (MSG_NOTE, vect_location,
 877                              "reduc or virtual phi. skip.\n");
 878           continue;
 879         }
 880
 881       type = TREE_TYPE (gimple_phi_result (phi));
 882       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 883       step_expr = unshare_expr (step_expr);
 884
 885       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 886          of degree >= 2 or exponential.  */
 887       gcc_assert (!tree_is_chrec (step_expr));
 888
 889       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 890
 891       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 892                          fold_convert (TREE_TYPE (step_expr), niters),
 893                          step_expr);
 894       if (POINTER_TYPE_P (type))
 895         ni = fold_build_pointer_plus (init_expr, off);
 896       else
 897         ni = fold_build2 (PLUS_EXPR, type,
 898                           init_expr, fold_convert (type, off));
 899
 900       var = create_tmp_var (type, "tmp");
 901
 902       last_gsi = gsi_last_bb (exit_bb);
 903       gimple_seq new_stmts = NULL;
 904       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 905       /* Exit_bb shouldn't be empty.  */
 906       if (!gsi_end_p (last_gsi))
 907         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 908       else
 909         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 910
 911       /* Fix phi expressions in the successor bb.  */
 912       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 913     }
 914 }
 915
 916 /* Function vect_gen_prolog_loop_niters
 917
 918    Generate the number of iterations which should be peeled as prolog for the
 919    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 920    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 921    As a result, after the execution of this loop, the data reference DR will
 922    refer to an aligned location.  The following computation is generated:
 923
 924    If the misalignment of DR is known at compile time:
 925      addr_mis = int mis = DR_MISALIGNMENT (dr);
 926    Else, compute address misalignment in bytes:
 927      addr_mis = addr & (vectype_align - 1)
 928
 929    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 930
 931    (elem_size = element type size; an element is the scalar element whose type
 932    is the inner type of the vectype)
 933
 934    The computations will be emitted at the end of BB.  We also compute and
 935    store upper bound (included) of the result in BOUND.
 936
 937    When the step of the data-ref in the loop is not 1 (as in interleaved data
 938    and SLP), the number of iterations of the prolog must be divided by the step
 939    (which is equal to the size of interleaved group).
 940
 941    The above formulas assume that VF == number of elements in the vector. This
 942    may not hold when there are multiple-types in the loop.
 943    In this case, for some data-references in the loop the VF does not represent
 944    the number of elements that fit in the vector.  Therefore, instead of VF we
 945    use TYPE_VECTOR_SUBPARTS.  */
 946
 947 static tree
 948 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 949                              basic_block bb, int *bound)
 950 {
 951   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 952   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 953   tree var;
 954   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 955   gimple_seq stmts = NULL, new_stmts = NULL;
 956   tree iters, iters_name;
 957   gimple *dr_stmt = DR_STMT (dr);
 958   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 959   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 960   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 961   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 962
 963   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 964     {
 965       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 966
 967       if (dump_enabled_p ())
 968         dump_printf_loc (MSG_NOTE, vect_location,
 969                          "known peeling = %d.\n", npeel);
 970
 971       iters = build_int_cst (niters_type, npeel);
 972       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 973     }
 974   else
 975     {
 976       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 977       tree offset = negative
 978           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 979       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 980                                                 &stmts, offset, loop);
 981       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 982       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 983       HOST_WIDE_INT elem_size =
 984                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 985       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 986       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 987       tree nelements_tree = build_int_cst (type, nelements);
 988       tree byte_misalign;
 989       tree elem_misalign;
 990
 991       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 992       byte_misalign =
 993         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 994                      vectype_align_minus_1);
 995
 996       /* Create:  elem_misalign = byte_misalign / element_size  */
 997       elem_misalign =
 998         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 999
1000       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
1001       if (negative)
1002         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1003       else
1004         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
1005       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1006       iters = fold_convert (niters_type, iters);
1007       *bound = nelements - 1;
1008     }
1009
1010   if (dump_enabled_p ())
1011     {
1012       dump_printf_loc (MSG_NOTE, vect_location,
1013                        "niters for prolog loop: ");
1014       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1015       dump_printf (MSG_NOTE, "\n");
1016     }
1017
1018   var = create_tmp_var (niters_type, "prolog_loop_niters");
1019   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1020
1021   if (new_stmts)
1022     gimple_seq_add_seq (&stmts, new_stmts);
1023   if (stmts)
1024     {
1025       gcc_assert (single_succ_p (bb));
1026       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1027       if (gsi_end_p (gsi))
1028         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1029       else
1030         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1031     }
1032   return iters_name;
1033 }
1034
1035
1036 /* Function vect_update_init_of_dr
1037
1038    NITERS iterations were peeled from LOOP.  DR represents a data reference
1039    in LOOP.  This function updates the information recorded in DR to
1040    account for the fact that the first NITERS iterations had already been
1041    executed.  Specifically, it updates the OFFSET field of DR.  */
1042
1043 static void
1044 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1045 {
1046   tree offset = DR_OFFSET (dr);
1047
1048   niters = fold_build2 (MULT_EXPR, sizetype,
1049                         fold_convert (sizetype, niters),
1050                         fold_convert (sizetype, DR_STEP (dr)));
1051   offset = fold_build2 (PLUS_EXPR, sizetype,
1052                         fold_convert (sizetype, offset), niters);
1053   DR_OFFSET (dr) = offset;
1054 }
1055
1056
1057 /* Function vect_update_inits_of_drs
1058
1059    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1060    This function updates the information recorded for the data references in
1061    the loop to account for the fact that the first NITERS iterations had
1062    already been executed.  Specifically, it updates the initial_condition of
1063    the access_function of all the data_references in the loop.  */
1064
1065 static void
1066 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1067 {
1068   unsigned int i;
1069   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1070   struct data_reference *dr;
1071
1072   if (dump_enabled_p ())
1073     dump_printf_loc (MSG_NOTE, vect_location,
1074                      "=== vect_update_inits_of_dr ===\n");
1075
1076   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1077   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1078     {
1079       gimple_seq seq;
1080       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1081       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1082
1083       niters = fold_convert (sizetype, niters);
1084       niters = force_gimple_operand (niters, &seq, false, var);
1085       if (seq)
1086         {
1087           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1088           gcc_assert (!new_bb);
1089         }
1090     }
1091
1092   FOR_EACH_VEC_ELT (datarefs, i, dr)
1093     vect_update_init_of_dr (dr, niters);
1094 }
1095
1096
1097 /* This function builds ni_name = number of iterations.  Statements
1098    are emitted on the loop preheader edge.  */
1099
1100 tree
1101 vect_build_loop_niters (loop_vec_info loop_vinfo)
1102 {
1103   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1104   if (TREE_CODE (ni) == INTEGER_CST)
1105     return ni;
1106   else
1107     {
1108       tree ni_name, var;
1109       gimple_seq stmts = NULL;
1110       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1111
1112       var = create_tmp_var (TREE_TYPE (ni), "niters");
1113       ni_name = force_gimple_operand (ni, &stmts, false, var);
1114       if (stmts)
1115         gsi_insert_seq_on_edge_immediate (pe, stmts);
1116
1117       return ni_name;
1118     }
1119 }
1120
1121 /* Calculate the number of iterations above which vectorized loop will be
1122    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1123    of prolog loop.  If it's integer const, the integer number is also passed
1124    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1125    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1126    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1127    (rather than vectorized) loop will be executed.  This function stores
1128    upper bound (included) of the result in BOUND_SCALAR.  */
1129
1130 static tree
1131 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1132                              int bound_prolog, int vfm1, int th,
1133                              int *bound_scalar, bool check_profitability)
1134 {
1135   tree type = TREE_TYPE (niters_prolog);
1136   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1137                              build_int_cst (type, vfm1));
1138
1139   *bound_scalar = vfm1 + bound_prolog;
1140   if (check_profitability)
1141     {
1142       /* TH indicates the minimum niters of vectorized loop, while we
1143          compute the maximum niters of scalar loop.  */
1144       th--;
1145       /* Peeling for constant times.  */
1146       if (int_niters_prolog >= 0)
1147         {
1148           *bound_scalar = (int_niters_prolog + vfm1 < th
1149                             ? th
1150                             : vfm1 + int_niters_prolog);
1151           return build_int_cst (type, *bound_scalar);
1152         }
1153       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1154          bound (inlcuded) of niters of prolog loop.  */
1155       if (th >=  vfm1 + bound_prolog)
1156         {
1157           *bound_scalar = th;
1158           return build_int_cst (type, th);
1159         }
1160       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1161       else if (th > vfm1)
1162         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1163     }
1164   return niters;
1165 }
1166
1167 /* This function generates the following statements:
1168
1169    niters = number of iterations loop executes (after peeling)
1170    niters_vector = niters / vf
1171
1172    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1173    true if NITERS doesn't overflow.  */
1174
1175 void
1176 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1177                              tree *niters_vector_ptr, bool niters_no_overflow)
1178 {
1179   tree ni_minus_gap, var;
1180   tree niters_vector;
1181   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1182   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1183   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1184
1185   /* If epilogue loop is required because of data accesses with gaps, we
1186      subtract one iteration from the total number of iterations here for
1187      correct calculation of RATIO.  */
1188   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1189     {
1190       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1191                                   niters,
1192                                   build_one_cst (TREE_TYPE (niters)));
1193       if (!is_gimple_val (ni_minus_gap))
1194         {
1195           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1196           gimple *stmts = NULL;
1197           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1198                                                true, var);
1199           gsi_insert_seq_on_edge_immediate (pe, stmts);
1200         }
1201     }
1202   else
1203     ni_minus_gap = niters;
1204
1205   /* Create: niters >> log2(vf) */
1206   /* If it's known that niters == number of latch executions + 1 doesn't
1207      overflow, we can generate niters >> log2(vf); otherwise we generate
1208      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1209      will be at least one.  */
1210   if (niters_no_overflow)
1211     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1212                                  ni_minus_gap, log_vf);
1213   else
1214     niters_vector
1215       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1216                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1217                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1218                                                ni_minus_gap,
1219                                                build_int_cst
1220                                                  (TREE_TYPE (niters), vf)),
1221                                   log_vf),
1222                      build_int_cst (TREE_TYPE (niters), 1));
1223
1224   if (!is_gimple_val (niters_vector))
1225     {
1226       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1227       gimple *stmts = NULL;
1228       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1229       gsi_insert_seq_on_edge_immediate (pe, stmts);
1230     }
1231   *niters_vector_ptr = niters_vector;
1232
1233   return;
1234 }
1235
1236 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1237    loop specified by LOOP_VINFO after vectorization, compute the number
1238    of iterations before vectorization (niters_vector * vf) and store it
1239    to NITERS_VECTOR_MULT_VF_PTR.  */
1240
1241 static void
1242 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1243                                      tree niters_vector,
1244                                      tree *niters_vector_mult_vf_ptr)
1245 {
1246   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1247   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1248   tree type = TREE_TYPE (niters_vector);
1249   tree log_vf = build_int_cst (type, exact_log2 (vf));
1250   basic_block exit_bb = single_exit (loop)->dest;
1251
1252   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1253   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1254                                             niters_vector, log_vf);
1255   if (!is_gimple_val (niters_vector_mult_vf))
1256     {
1257       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1258       gimple_seq stmts = NULL;
1259       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1260                                                     &stmts, true, var);
1261       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1262       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1263     }
1264   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1265 }
1266
1267 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1268    from SECOND/FIRST and puts it at the original loop's preheader/exit
1269    edge, the two loops are arranged as below:
1270
1271        preheader_a:
1272      first_loop:
1273        header_a:
1274          i_1 = PHI<i_0, i_2>;
1275          ...
1276          i_2 = i_1 + 1;
1277          if (cond_a)
1278            goto latch_a;
1279          else
1280            goto between_bb;
1281        latch_a:
1282          goto header_a;
1283
1284        between_bb:
1285          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1286
1287      second_loop:
1288        header_b:
1289          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1290                                  or with i_2 if no LCSSA phi is created
1291                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1292          ...
1293          i_4 = i_3 + 1;
1294          if (cond_b)
1295            goto latch_b;
1296          else
1297            goto exit_bb;
1298        latch_b:
1299          goto header_b;
1300
1301        exit_bb:
1302
1303    This function creates loop closed SSA for the first loop; update the
1304    second loop's PHI nodes by replacing argument on incoming edge with the
1305    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1306    is false, Loop closed ssa phis will only be created for non-iv phis for
1307    the first loop.
1308
1309    This function assumes exit bb of the first loop is preheader bb of the
1310    second loop, i.e, between_bb in the example code.  With PHIs updated,
1311    the second loop will execute rest iterations of the first.  */
1312
1313 static void
1314 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1315                                    struct loop *first, struct loop *second,
1316                                    bool create_lcssa_for_iv_phis)
1317 {
1318   gphi_iterator gsi_update, gsi_orig;
1319   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1320
1321   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1322   edge second_preheader_e = loop_preheader_edge (second);
1323   basic_block between_bb = single_exit (first)->dest;
1324
1325   gcc_assert (between_bb == second_preheader_e->src);
1326   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1327   /* Either the first loop or the second is the loop to be vectorized.  */
1328   gcc_assert (loop == first || loop == second);
1329
1330   for (gsi_orig = gsi_start_phis (first->header),
1331        gsi_update = gsi_start_phis (second->header);
1332        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1333        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1334     {
1335       gphi *orig_phi = gsi_orig.phi ();
1336       gphi *update_phi = gsi_update.phi ();
1337
1338       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1339       /* Generate lcssa PHI node for the first loop.  */
1340       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1341       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1342         {
1343           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1344           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1345           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1346           arg = new_res;
1347         }
1348
1349       /* Update PHI node in the second loop by replacing arg on the loop's
1350          incoming edge.  */
1351       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1352     }
1353 }
1354
1355 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1356    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1357    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1358    appear like below:
1359
1360        guard_bb:
1361          if (cond)
1362            goto merge_bb;
1363          else
1364            goto skip_loop;
1365
1366      skip_loop:
1367        header_a:
1368          i_1 = PHI<i_0, i_2>;
1369          ...
1370          i_2 = i_1 + 1;
1371          if (cond_a)
1372            goto latch_a;
1373          else
1374            goto exit_a;
1375        latch_a:
1376          goto header_a;
1377
1378        exit_a:
1379          i_5 = PHI<i_2>;
1380
1381        merge_bb:
1382          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1383
1384      update_loop:
1385        header_b:
1386          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1387          ...
1388          i_4 = i_3 + 1;
1389          if (cond_b)
1390            goto latch_b;
1391          else
1392            goto exit_bb;
1393        latch_b:
1394          goto header_b;
1395
1396        exit_bb:
1397
1398    This function creates PHI nodes at merge_bb and replaces the use of i_5
1399    in the update_loop's PHI node with the result of new PHI result.  */
1400
1401 static void
1402 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1403                                     struct loop *update_loop,
1404                                     edge guard_edge, edge merge_edge)
1405 {
1406   source_location merge_loc, guard_loc;
1407   edge orig_e = loop_preheader_edge (skip_loop);
1408   edge update_e = loop_preheader_edge (update_loop);
1409   gphi_iterator gsi_orig, gsi_update;
1410
1411   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1412         gsi_update = gsi_start_phis (update_loop->header));
1413        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1414        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1415     {
1416       gphi *orig_phi = gsi_orig.phi ();
1417       gphi *update_phi = gsi_update.phi ();
1418
1419       /* Generate new phi node at merge bb of the guard.  */
1420       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1421       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1422
1423       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1424          args in NEW_PHI for these edges.  */
1425       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1426       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1427       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1428       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1429       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1430       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1431
1432       /* Update phi in UPDATE_PHI.  */
1433       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1434     }
1435 }
1436
1437 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1438    this function searches for the corresponding lcssa phi node in exit
1439    bb of LOOP.  If it is found, return the phi result; otherwise return
1440    NULL.  */
1441
1442 static tree
1443 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1444                 gphi *lcssa_phi)
1445 {
1446   gphi_iterator gsi;
1447   edge e = single_exit (loop);
1448
1449   gcc_assert (single_pred_p (e->dest));
1450   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1451     {
1452       gphi *phi = gsi.phi ();
1453       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1454                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1455         return PHI_RESULT (phi);
1456     }
1457   return NULL_TREE;
1458 }
1459
1460 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1461    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1462    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1463    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1464    The CFG looks like:
1465
1466      loop:
1467        header_a:
1468          i_1 = PHI<i_0, i_2>;
1469          ...
1470          i_2 = i_1 + 1;
1471          if (cond_a)
1472            goto latch_a;
1473          else
1474            goto exit_a;
1475        latch_a:
1476          goto header_a;
1477
1478        exit_a:
1479
1480        guard_bb:
1481          if (cond)
1482            goto merge_bb;
1483          else
1484            goto epilog_loop;
1485
1486        ;; fall_through_bb
1487
1488      epilog_loop:
1489        header_b:
1490          i_3 = PHI<i_2, i_4>;
1491          ...
1492          i_4 = i_3 + 1;
1493          if (cond_b)
1494            goto latch_b;
1495          else
1496            goto merge_bb;
1497        latch_b:
1498          goto header_b;
1499
1500        merge_bb:
1501          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1502
1503        exit_bb:
1504          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1505
1506    For each name used out side EPILOG (i.e - for each name that has a lcssa
1507    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1508    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1509    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1510    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1511    in exit_bb will also be updated.  */
1512
1513 static void
1514 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1515                                     edge guard_edge, edge merge_edge)
1516 {
1517   gphi_iterator gsi;
1518   basic_block merge_bb = guard_edge->dest;
1519
1520   gcc_assert (single_succ_p (merge_bb));
1521   edge e = single_succ_edge (merge_bb);
1522   basic_block exit_bb = e->dest;
1523   gcc_assert (single_pred_p (exit_bb));
1524   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1525
1526   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1527     {
1528       gphi *update_phi = gsi.phi ();
1529       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1530       /* This loop-closed-phi actually doesn't represent a use out of the
1531          loop - the phi arg is a constant.  */
1532       if (TREE_CODE (old_arg) != SSA_NAME)
1533         continue;
1534
1535       tree merge_arg = get_current_def (old_arg);
1536       if (!merge_arg)
1537         merge_arg = old_arg;
1538
1539       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1540       /* If the var is live after loop but not a reduction, we simply
1541          use the old arg.  */
1542       if (!guard_arg)
1543         guard_arg = old_arg;
1544
1545       /* Create new phi node in MERGE_BB:  */
1546       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1547       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1548
1549       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1550          the two PHI args in merge_phi for these edges.  */
1551       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1552       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1553
1554       /* Update the original phi in exit_bb.  */
1555       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1556     }
1557 }
1558
1559 /* EPILOG loop is duplicated from the original loop for vectorizing,
1560    the arg of its loop closed ssa PHI needs to be updated.  */
1561
1562 static void
1563 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1564 {
1565   gphi_iterator gsi;
1566   basic_block exit_bb = single_exit (epilog)->dest;
1567
1568   gcc_assert (single_pred_p (exit_bb));
1569   edge e = EDGE_PRED (exit_bb, 0);
1570   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1571     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1572 }
1573
1574 /* Function vect_do_peeling.
1575
1576    Input:
1577    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1578
1579        preheader:
1580      LOOP:
1581        header_bb:
1582          loop_body
1583          if (exit_loop_cond) goto exit_bb
1584          else                goto header_bb
1585        exit_bb:
1586
1587    - NITERS: The number of iterations of the loop.
1588    - NITERSM1: The number of iterations of the loop's latch.
1589    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1590    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1591                               CHECK_PROFITABILITY is true.
1592    Output:
1593    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1594
1595    This function peels prolog and epilog from the loop, adds guards skipping
1596    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1597    would look like:
1598
1599        guard_bb_1:
1600          if (prefer_scalar_loop) goto merge_bb_1
1601          else                    goto guard_bb_2
1602
1603        guard_bb_2:
1604          if (skip_prolog) goto merge_bb_2
1605          else             goto prolog_preheader
1606
1607        prolog_preheader:
1608      PROLOG:
1609        prolog_header_bb:
1610          prolog_body
1611          if (exit_prolog_cond) goto prolog_exit_bb
1612          else                  goto prolog_header_bb
1613        prolog_exit_bb:
1614
1615        merge_bb_2:
1616
1617        vector_preheader:
1618      VECTOR LOOP:
1619        vector_header_bb:
1620          vector_body
1621          if (exit_vector_cond) goto vector_exit_bb
1622          else                  goto vector_header_bb
1623        vector_exit_bb:
1624
1625        guard_bb_3:
1626          if (skip_epilog) goto merge_bb_3
1627          else             goto epilog_preheader
1628
1629        merge_bb_1:
1630
1631        epilog_preheader:
1632      EPILOG:
1633        epilog_header_bb:
1634          epilog_body
1635          if (exit_epilog_cond) goto merge_bb_3
1636          else                  goto epilog_header_bb
1637
1638        merge_bb_3:
1639
1640    Note this function peels prolog and epilog only if it's necessary,
1641    as well as guards.
1642    Returns created epilogue or NULL.
1643
1644    TODO: Guard for prefer_scalar_loop should be emitted along with
1645    versioning conditions if loop versioning is needed.  */
1646
1647
1648 struct loop *
1649 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1650                  tree *niters_vector, int th, bool check_profitability,
1651                  bool niters_no_overflow)
1652 {
1653   edge e, guard_e;
1654   tree type = TREE_TYPE (niters), guard_cond;
1655   basic_block guard_bb, guard_to;
1656   int prob_prolog, prob_vector, prob_epilog;
1657   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1658   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1659   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1660   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1661                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1662
1663   if (!prolog_peeling && !epilog_peeling)
1664     return NULL;
1665
1666   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1667   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1668     vf = 3;
1669   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1670   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1671
1672   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1673   struct loop *first_loop = loop;
1674   bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1675   create_lcssa_for_virtual_phi (loop);
1676   update_ssa (TODO_update_ssa_only_virtuals);
1677
1678   if (MAY_HAVE_DEBUG_STMTS)
1679     {
1680       gcc_assert (!adjust_vec.exists ());
1681       adjust_vec.create (32);
1682     }
1683   initialize_original_copy_tables ();
1684
1685   /* Prolog loop may be skipped.  */
1686   bool skip_prolog = (prolog_peeling != 0);
1687   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1688      we peel for epilog loop.  */
1689   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1690   /* Epilog loop must be executed if the number of iterations for epilog
1691      loop is known at compile time, otherwise we need to add a check at
1692      the end of vector loop and skip to the end of epilog loop.  */
1693   bool skip_epilog = (prolog_peeling < 0
1694                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1695   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1696   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1697     skip_epilog = false;
1698
1699   /* Record the anchor bb at which guard should be placed if scalar loop
1700      may be preferred.  */
1701   basic_block anchor = loop_preheader_edge (loop)->src;
1702   if (skip_vector)
1703     {
1704       split_edge (loop_preheader_edge (loop));
1705
1706       /* Due to the order in which we peel prolog and epilog, we first
1707          propagate probability to the whole loop.  The purpose is to
1708          avoid adjusting probabilities of both prolog and vector loops
1709          separately.  Note in this case, the probability of epilog loop
1710          needs to be scaled back later.  */
1711       basic_block bb_before_loop = loop_preheader_edge (loop)->src;
1712       scale_bbs_frequencies_int (&bb_before_loop, 1, prob_vector,
1713                                  REG_BR_PROB_BASE);
1714       scale_loop_profile (loop, prob_vector, bound);
1715     }
1716
1717   tree niters_prolog = build_int_cst (type, 0);
1718   source_location loop_loc = find_loop_location (loop);
1719   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1720   if (prolog_peeling)
1721     {
1722       e = loop_preheader_edge (loop);
1723       if (!slpeel_can_duplicate_loop_p (loop, e))
1724         {
1725           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1726                            "loop can't be duplicated to preheader edge.\n");
1727           gcc_unreachable ();
1728         }
1729       /* Peel prolog and put it on preheader edge of loop.  */
1730       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1731       if (!prolog)
1732         {
1733           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1734                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1735           gcc_unreachable ();
1736         }
1737       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1738       first_loop = prolog;
1739       reset_original_copy_tables ();
1740
1741       /* Generate and update the number of iterations for prolog loop.  */
1742       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1743                                                    &bound_prolog);
1744       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1745
1746       /* Skip the prolog loop.  */
1747       if (skip_prolog)
1748         {
1749           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1750                                     niters_prolog, build_int_cst (type, 0));
1751           guard_bb = loop_preheader_edge (prolog)->src;
1752           basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
1753           guard_to = split_edge (loop_preheader_edge (loop));
1754           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1755                                            guard_to, guard_bb,
1756                                            inverse_probability (prob_prolog),
1757                                            irred_flag);
1758           e = EDGE_PRED (guard_to, 0);
1759           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1760           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1761
1762           scale_bbs_frequencies_int (&bb_after_prolog, 1, prob_prolog,
1763                                      REG_BR_PROB_BASE);
1764           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1765         }
1766       /* Update init address of DRs.  */
1767       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1768       /* Update niters for vector loop.  */
1769       LOOP_VINFO_NITERS (loop_vinfo)
1770         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1771       LOOP_VINFO_NITERSM1 (loop_vinfo)
1772         = fold_build2 (MINUS_EXPR, type,
1773                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1774       niters = vect_build_loop_niters (loop_vinfo);
1775
1776       /* Prolog iterates at most bound_prolog times, latch iterates at
1777          most bound_prolog - 1 times.  */
1778       record_niter_bound (prolog, bound_prolog - 1, false, true);
1779       delete_update_ssa ();
1780       adjust_vec_debug_stmts ();
1781       scev_reset ();
1782     }
1783
1784   if (epilog_peeling)
1785     {
1786       e = single_exit (loop);
1787       if (!slpeel_can_duplicate_loop_p (loop, e))
1788         {
1789           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1790                            "loop can't be duplicated to exit edge.\n");
1791           gcc_unreachable ();
1792         }
1793       /* Peel epilog and put it on exit edge of loop.  */
1794       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1795       if (!epilog)
1796         {
1797           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1798                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1799           gcc_unreachable ();
1800         }
1801       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1802
1803       /* Scalar version loop may be preferred.  In this case, add guard
1804          and skip to epilog.  Note this only happens when the number of
1805          iterations of loop is unknown at compile time, otherwise this
1806          won't be vectorized.  */
1807       if (skip_vector)
1808         {
1809           /* Additional epilogue iteration is peeled if gap exists.  */
1810           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1811           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1812                                                 bound_prolog,
1813                                                 peel_for_gaps ? vf : vf - 1,
1814                                                 th, &bound_scalar,
1815                                                 check_profitability);
1816           /* Build guard against NITERSM1 since NITERS may overflow.  */
1817           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1818           guard_bb = anchor;
1819           guard_to = split_edge (loop_preheader_edge (epilog));
1820           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1821                                            guard_to, guard_bb,
1822                                            inverse_probability (prob_vector),
1823                                            irred_flag);
1824           e = EDGE_PRED (guard_to, 0);
1825           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1826           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1827
1828           /* Simply propagate profile info from guard_bb to guard_to which is
1829              a merge point of control flow.  */
1830           guard_to->frequency = guard_bb->frequency;
1831           guard_to->count = guard_bb->count;
1832           single_succ_edge (guard_to)->count = guard_to->count;
1833           /* Scale probability of epilog loop back.  */
1834           int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE / prob_vector;
1835           scale_loop_frequencies (epilog, scale_up, REG_BR_PROB_BASE);
1836         }
1837
1838       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
1839       tree niters_vector_mult_vf;
1840       /* If loop is peeled for non-zero constant times, now niters refers to
1841          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1842          overflows.  */
1843       niters_no_overflow |= (prolog_peeling > 0);
1844       vect_gen_vector_loop_niters (loop_vinfo, niters,
1845                                    niters_vector, niters_no_overflow);
1846       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1847                                            &niters_vector_mult_vf);
1848       /* Update IVs of original loop as if they were advanced by
1849          niters_vector_mult_vf steps.  */
1850       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1851       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1852       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1853                                         update_e);
1854
1855       if (skip_epilog)
1856         {
1857           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1858                                     niters, niters_vector_mult_vf);
1859           guard_bb = single_exit (loop)->dest;
1860           guard_to = split_edge (single_exit (epilog));
1861           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1862                                            skip_vector ? anchor : guard_bb,
1863                                            inverse_probability (prob_epilog),
1864                                            irred_flag);
1865           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1866                                               single_exit (epilog));
1867           /* Only need to handle basic block before epilog loop if it's not
1868              the guard_bb, which is the case when skip_vector is true.  */
1869           if (guard_bb != bb_before_epilog)
1870             {
1871               prob_epilog = (combine_probabilities (prob_vector, prob_epilog)
1872                              + inverse_probability (prob_vector));
1873
1874               scale_bbs_frequencies_int (&bb_before_epilog, 1, prob_epilog,
1875                                          REG_BR_PROB_BASE);
1876             }
1877           scale_loop_profile (epilog, prob_epilog, bound);
1878         }
1879       else
1880         slpeel_update_phi_nodes_for_lcssa (epilog);
1881
1882       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1883       /* We share epilog loop with scalar version loop.  */
1884       bound = MAX (bound, bound_scalar - 1);
1885       record_niter_bound (epilog, bound, false, true);
1886
1887       delete_update_ssa ();
1888       adjust_vec_debug_stmts ();
1889       scev_reset ();
1890     }
1891   adjust_vec.release ();
1892   free_original_copy_tables ();
1893
1894   return epilog;
1895 }
1896
1897 /* Function vect_create_cond_for_niters_checks.
1898
1899    Create a conditional expression that represents the run-time checks for
1900    loop's niter.  The loop is guaranteed to to terminate if the run-time
1901    checks hold.
1902
1903    Input:
1904    COND_EXPR  - input conditional expression.  New conditions will be chained
1905                 with logical AND operation.  If it is NULL, then the function
1906                 is used to return the number of alias checks.
1907    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1908                 to be checked.
1909
1910    Output:
1911    COND_EXPR - conditional expression.
1912
1913    The returned COND_EXPR is the conditional expression to be used in the
1914    if statement that controls which version of the loop gets executed at
1915    runtime.  */
1916
1917 static void
1918 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1919 {
1920   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1921
1922   if (*cond_expr)
1923     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1924                               *cond_expr, part_cond_expr);
1925   else
1926     *cond_expr = part_cond_expr;
1927 }
1928
1929 /* Function vect_create_cond_for_align_checks.
1930
1931    Create a conditional expression that represents the alignment checks for
1932    all of data references (array element references) whose alignment must be
1933    checked at runtime.
1934
1935    Input:
1936    COND_EXPR  - input conditional expression.  New conditions will be chained
1937                 with logical AND operation.
1938    LOOP_VINFO - two fields of the loop information are used.
1939                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1940                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1941
1942    Output:
1943    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1944                          expression.
1945    The returned value is the conditional expression to be used in the if
1946    statement that controls which version of the loop gets executed at runtime.
1947
1948    The algorithm makes two assumptions:
1949      1) The number of bytes "n" in a vector is a power of 2.
1950      2) An address "a" is aligned if a%n is zero and that this
1951         test can be done as a&(n-1) == 0.  For example, for 16
1952         byte vectors the test is a&0xf == 0.  */
1953
1954 static void
1955 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1956                                    tree *cond_expr,
1957                                    gimple_seq *cond_expr_stmt_list)
1958 {
1959   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1960   vec<gimple *> may_misalign_stmts
1961     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1962   gimple *ref_stmt;
1963   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1964   tree mask_cst;
1965   unsigned int i;
1966   tree int_ptrsize_type;
1967   char tmp_name[20];
1968   tree or_tmp_name = NULL_TREE;
1969   tree and_tmp_name;
1970   gimple *and_stmt;
1971   tree ptrsize_zero;
1972   tree part_cond_expr;
1973
1974   /* Check that mask is one less than a power of 2, i.e., mask is
1975      all zeros followed by all ones.  */
1976   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1977
1978   int_ptrsize_type = signed_type_for (ptr_type_node);
1979
1980   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1981      of the first vector of the i'th data reference. */
1982
1983   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1984     {
1985       gimple_seq new_stmt_list = NULL;
1986       tree addr_base;
1987       tree addr_tmp_name;
1988       tree new_or_tmp_name;
1989       gimple *addr_stmt, *or_stmt;
1990       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1991       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1992       bool negative = tree_int_cst_compare
1993         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1994       tree offset = negative
1995         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1996
1997       /* create: addr_tmp = (int)(address_of_first_vector) */
1998       addr_base =
1999         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2000                                               offset, loop);
2001       if (new_stmt_list != NULL)
2002         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2003
2004       sprintf (tmp_name, "addr2int%d", i);
2005       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2006       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
2007       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2008
2009       /* The addresses are OR together.  */
2010
2011       if (or_tmp_name != NULL_TREE)
2012         {
2013           /* create: or_tmp = or_tmp | addr_tmp */
2014           sprintf (tmp_name, "orptrs%d", i);
2015           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
2016           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2017                                          or_tmp_name, addr_tmp_name);
2018           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2019           or_tmp_name = new_or_tmp_name;
2020         }
2021       else
2022         or_tmp_name = addr_tmp_name;
2023
2024     } /* end for i */
2025
2026   mask_cst = build_int_cst (int_ptrsize_type, mask);
2027
2028   /* create: and_tmp = or_tmp & mask  */
2029   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
2030
2031   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2032                                   or_tmp_name, mask_cst);
2033   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2034
2035   /* Make and_tmp the left operand of the conditional test against zero.
2036      if and_tmp has a nonzero bit then some address is unaligned.  */
2037   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2038   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2039                                 and_tmp_name, ptrsize_zero);
2040   if (*cond_expr)
2041     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2042                               *cond_expr, part_cond_expr);
2043   else
2044     *cond_expr = part_cond_expr;
2045 }
2046
2047 /* Given LOOP's two data references and segment lengths described by DR_A
2048    and DR_B, create expression checking if the two addresses ranges intersect
2049    with each other based on index of the two addresses.  This can only be
2050    done if DR_A and DR_B referring to the same (array) object and the index
2051    is the only difference.  For example:
2052
2053                        DR_A                           DR_B
2054       data-ref         arr[i]                         arr[j]
2055       base_object      arr                            arr
2056       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2057
2058    The addresses and their index are like:
2059
2060         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2061      ------------------------------------------------------->
2062         |   |   |   |   |          |   |   |   |   |
2063      ------------------------------------------------------->
2064         i_0 ...         i_0+4      j_0 ...         j_0+4
2065
2066    We can create expression based on index rather than address:
2067
2068      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2069
2070    Note evolution step of index needs to be considered in comparison.  */
2071
2072 static bool
2073 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
2074                                      const dr_with_seg_len& dr_a,
2075                                      const dr_with_seg_len& dr_b)
2076 {
2077   if (integer_zerop (DR_STEP (dr_a.dr))
2078       || integer_zerop (DR_STEP (dr_b.dr))
2079       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2080     return false;
2081
2082   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2083     return false;
2084
2085   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2086     return false;
2087
2088   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2089     return false;
2090
2091   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2092     return false;
2093
2094   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2095
2096   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2097   unsigned HOST_WIDE_INT abs_step
2098     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2099
2100   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2101   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2102   /* Infer the number of iterations with which the memory segment is accessed
2103      by DR.  In other words, alias is checked if memory segment accessed by
2104      DR_A in some iterations intersect with memory segment accessed by DR_B
2105      in the same amount iterations.
2106      Note segnment length is a linear function of number of iterations with
2107      DR_STEP as the coefficient.  */
2108   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2109   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2110
2111   unsigned int i;
2112   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2113     {
2114       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2115       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2116       /* Two indices must be the same if they are not scev, or not scev wrto
2117          current loop being vecorized.  */
2118       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2119           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2120           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2121           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2122         {
2123           if (operand_equal_p (access1, access2, 0))
2124             continue;
2125
2126           return false;
2127         }
2128       /* The two indices must have the same step.  */
2129       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2130         return false;
2131
2132       tree idx_step = CHREC_RIGHT (access1);
2133       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2134       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2135       /* Index must evaluate in the same direction as DR.  */
2136       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2137
2138       tree min1 = CHREC_LEFT (access1);
2139       tree min2 = CHREC_LEFT (access2);
2140       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2141         return false;
2142
2143       /* Ideally, alias can be checked against loop's control IV, but we
2144          need to prove linear mapping between control IV and reference
2145          index.  Although that should be true, we check against (array)
2146          index of data reference.  Like segment length, index length is
2147          linear function of the number of iterations with index_step as
2148          the coefficient, i.e, niter_len * idx_step.  */
2149       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2150                                    build_int_cst (TREE_TYPE (min1),
2151                                                   niter_len1));
2152       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2153                                    build_int_cst (TREE_TYPE (min2),
2154                                                   niter_len2));
2155       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2156       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2157       /* Adjust ranges for negative step.  */
2158       if (neg_step)
2159         {
2160           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2161           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2162                               CHREC_LEFT (access1), idx_step);
2163           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2164           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2165                               CHREC_LEFT (access2), idx_step);
2166         }
2167       tree part_cond_expr
2168         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2169             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2170             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2171       if (*cond_expr)
2172         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2173                                   *cond_expr, part_cond_expr);
2174       else
2175         *cond_expr = part_cond_expr;
2176     }
2177   return true;
2178 }
2179
2180 /* Given two data references and segment lengths described by DR_A and DR_B,
2181    create expression checking if the two addresses ranges intersect with
2182    each other:
2183
2184      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2185      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2186
2187 static void
2188 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
2189                                const dr_with_seg_len& dr_a,
2190                                const dr_with_seg_len& dr_b)
2191 {
2192   *cond_expr = NULL_TREE;
2193   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
2194     return;
2195
2196   tree segment_length_a = dr_a.seg_len;
2197   tree segment_length_b = dr_b.seg_len;
2198   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2199   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2200   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2201
2202   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2203                           offset_a, DR_INIT (dr_a.dr));
2204   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2205                           offset_b, DR_INIT (dr_b.dr));
2206   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2207   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2208
2209   tree seg_a_min = addr_base_a;
2210   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2211   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2212      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2213      [a, a+12) */
2214   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2215     {
2216       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2217       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2218       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2219     }
2220
2221   tree seg_b_min = addr_base_b;
2222   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2223   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2224     {
2225       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2226       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2227       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2228     }
2229   *cond_expr
2230     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2231         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2232         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2233 }
2234
2235 /* Function vect_create_cond_for_alias_checks.
2236
2237    Create a conditional expression that represents the run-time checks for
2238    overlapping of address ranges represented by a list of data references
2239    relations passed as input.
2240
2241    Input:
2242    COND_EXPR  - input conditional expression.  New conditions will be chained
2243                 with logical AND operation.  If it is NULL, then the function
2244                 is used to return the number of alias checks.
2245    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2246                 to be checked.
2247
2248    Output:
2249    COND_EXPR - conditional expression.
2250
2251    The returned COND_EXPR is the conditional expression to be used in the if
2252    statement that controls which version of the loop gets executed at runtime.
2253 */
2254
2255 void
2256 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2257 {
2258   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2259     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2260   tree part_cond_expr;
2261
2262   if (comp_alias_ddrs.is_empty ())
2263     return;
2264
2265   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2266   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2267     {
2268       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2269       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2270
2271       if (dump_enabled_p ())
2272         {
2273           dump_printf_loc (MSG_NOTE, vect_location,
2274                            "create runtime check for data references ");
2275           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2276           dump_printf (MSG_NOTE, " and ");
2277           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2278           dump_printf (MSG_NOTE, "\n");
2279         }
2280
2281       /* Create condition expression for each pair data references.  */
2282       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
2283       if (*cond_expr)
2284         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2285                                   *cond_expr, part_cond_expr);
2286       else
2287         *cond_expr = part_cond_expr;
2288     }
2289
2290   if (dump_enabled_p ())
2291     dump_printf_loc (MSG_NOTE, vect_location,
2292                      "created %u versioning for alias checks.\n",
2293                      comp_alias_ddrs.length ());
2294 }
2295
2296
2297 /* Function vect_loop_versioning.
2298
2299    If the loop has data references that may or may not be aligned or/and
2300    has data reference relations whose independence was not proven then
2301    two versions of the loop need to be generated, one which is vectorized
2302    and one which isn't.  A test is then generated to control which of the
2303    loops is executed.  The test checks for the alignment of all of the
2304    data references that may or may not be aligned.  An additional
2305    sequence of runtime tests is generated for each pairs of DDRs whose
2306    independence was not proven.  The vectorized version of loop is
2307    executed only if both alias and alignment tests are passed.
2308
2309    The test generated to check which version of loop is executed
2310    is modified to also check for profitability as indicated by the
2311    cost model threshold TH.
2312
2313    The versioning precondition(s) are placed in *COND_EXPR and
2314    *COND_EXPR_STMT_LIST.  */
2315
2316 void
2317 vect_loop_versioning (loop_vec_info loop_vinfo,
2318                       unsigned int th, bool check_profitability)
2319 {
2320   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2321   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2322   basic_block condition_bb;
2323   gphi_iterator gsi;
2324   gimple_stmt_iterator cond_exp_gsi;
2325   basic_block merge_bb;
2326   basic_block new_exit_bb;
2327   edge new_exit_e, e;
2328   gphi *orig_phi, *new_phi;
2329   tree cond_expr = NULL_TREE;
2330   gimple_seq cond_expr_stmt_list = NULL;
2331   tree arg;
2332   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2333   gimple_seq gimplify_stmt_list = NULL;
2334   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2335   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2336   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2337   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2338
2339   if (check_profitability)
2340     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2341                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2342                                                        th));
2343
2344   if (version_niter)
2345     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2346
2347   if (cond_expr)
2348     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2349                                         is_gimple_condexpr, NULL_TREE);
2350
2351   if (version_align)
2352     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2353                                        &cond_expr_stmt_list);
2354
2355   if (version_alias)
2356     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2357
2358   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2359                                       is_gimple_condexpr, NULL_TREE);
2360   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2361
2362   initialize_original_copy_tables ();
2363   if (scalar_loop)
2364     {
2365       edge scalar_e;
2366       basic_block preheader, scalar_preheader;
2367
2368       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2369          scale LOOP's frequencies instead.  */
2370       nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
2371                             prob, REG_BR_PROB_BASE - prob,
2372                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2373       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2374       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2375          while we need to move it above LOOP's preheader.  */
2376       e = loop_preheader_edge (loop);
2377       scalar_e = loop_preheader_edge (scalar_loop);
2378       gcc_assert (empty_block_p (e->src)
2379                   && single_pred_p (e->src));
2380       gcc_assert (empty_block_p (scalar_e->src)
2381                   && single_pred_p (scalar_e->src));
2382       gcc_assert (single_pred_p (condition_bb));
2383       preheader = e->src;
2384       scalar_preheader = scalar_e->src;
2385       scalar_e = find_edge (condition_bb, scalar_preheader);
2386       e = single_pred_edge (preheader);
2387       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2388                                       scalar_preheader);
2389       redirect_edge_and_branch_force (scalar_e, preheader);
2390       redirect_edge_and_branch_force (e, condition_bb);
2391       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2392                                single_pred (condition_bb));
2393       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2394                                single_pred (scalar_preheader));
2395       set_immediate_dominator (CDI_DOMINATORS, preheader,
2396                                condition_bb);
2397     }
2398   else
2399     nloop = loop_version (loop, cond_expr, &condition_bb,
2400                           prob, REG_BR_PROB_BASE - prob,
2401                           prob, REG_BR_PROB_BASE - prob, true);
2402
2403   if (version_niter)
2404     {
2405       /* The versioned loop could be infinite, we need to clear existing
2406          niter information which is copied from the original loop.  */
2407       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2408       vect_free_loop_info_assumptions (nloop);
2409       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2410       loop_constraint_set (loop, LOOP_C_INFINITE);
2411     }
2412
2413   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2414       && dump_enabled_p ())
2415     {
2416       if (version_alias)
2417         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2418                          "loop versioned for vectorization because of "
2419                          "possible aliasing\n");
2420       if (version_align)
2421         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2422                          "loop versioned for vectorization to enhance "
2423                          "alignment\n");
2424
2425     }
2426   free_original_copy_tables ();
2427
2428   /* Loop versioning violates an assumption we try to maintain during
2429      vectorization - that the loop exit block has a single predecessor.
2430      After versioning, the exit block of both loop versions is the same
2431      basic block (i.e. it has two predecessors). Just in order to simplify
2432      following transformations in the vectorizer, we fix this situation
2433      here by adding a new (empty) block on the exit-edge of the loop,
2434      with the proper loop-exit phis to maintain loop-closed-form.
2435      If loop versioning wasn't done from loop, but scalar_loop instead,
2436      merge_bb will have already just a single successor.  */
2437
2438   merge_bb = single_exit (loop)->dest;
2439   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2440     {
2441       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2442       new_exit_bb = split_edge (single_exit (loop));
2443       new_exit_e = single_exit (loop);
2444       e = EDGE_SUCC (new_exit_bb, 0);
2445
2446       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2447         {
2448           tree new_res;
2449           orig_phi = gsi.phi ();
2450           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2451           new_phi = create_phi_node (new_res, new_exit_bb);
2452           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2453           add_phi_arg (new_phi, arg, new_exit_e,
2454                        gimple_phi_arg_location_from_edge (orig_phi, e));
2455           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2456         }
2457     }
2458
2459   /* End loop-exit-fixes after versioning.  */
2460
2461   if (cond_expr_stmt_list)
2462     {
2463       cond_exp_gsi = gsi_last_bb (condition_bb);
2464       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2465                              GSI_SAME_STMT);
2466     }
2467   update_ssa (TODO_update_ssa);
2468 }