gcc/tree-vect-loop-manip.c

   1 /* Vectorizer Specific Loop Manipulations
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4    and Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "cfghooks.h"
  29 #include "tree-pass.h"
  30 #include "ssa.h"
  31 #include "fold-const.h"
  32 #include "cfganal.h"
  33 #include "gimplify.h"
  34 #include "gimple-iterator.h"
  35 #include "gimplify-me.h"
  36 #include "tree-cfg.h"
  37 #include "tree-ssa-loop-manip.h"
  38 #include "tree-into-ssa.h"
  39 #include "tree-ssa.h"
  40 #include "cfgloop.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "tree-ssa-loop-ivopts.h"
  44
  45 /*************************************************************************
  46   Simple Loop Peeling Utilities
  47
  48   Utilities to support loop peeling for vectorization purposes.
  49  *************************************************************************/
  50
  51
  52 /* Renames the use *OP_P.  */
  53
  54 static void
  55 rename_use_op (use_operand_p op_p)
  56 {
  57   tree new_name;
  58
  59   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
  60     return;
  61
  62   new_name = get_current_def (USE_FROM_PTR (op_p));
  63
  64   /* Something defined outside of the loop.  */
  65   if (!new_name)
  66     return;
  67
  68   /* An ordinary ssa name defined in the loop.  */
  69
  70   SET_USE (op_p, new_name);
  71 }
  72
  73
  74 /* Renames the variables in basic block BB.  Allow renaming  of PHI arguments
  75    on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
  76    true.  */
  77
  78 static void
  79 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
  80 {
  81   gimple *stmt;
  82   use_operand_p use_p;
  83   ssa_op_iter iter;
  84   edge e;
  85   edge_iterator ei;
  86   struct loop *loop = bb->loop_father;
  87   struct loop *outer_loop = NULL;
  88
  89   if (rename_from_outer_loop)
  90     {
  91       gcc_assert (loop);
  92       outer_loop = loop_outer (loop);
  93     }
  94
  95   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
  96        gsi_next (&gsi))
  97     {
  98       stmt = gsi_stmt (gsi);
  99       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
 100         rename_use_op (use_p);
 101     }
 102
 103   FOR_EACH_EDGE (e, ei, bb->preds)
 104     {
 105       if (!flow_bb_inside_loop_p (loop, e->src))
 106         {
 107           if (!rename_from_outer_loop)
 108             continue;
 109           if (e->src != outer_loop->header)
 110             {
 111               if (outer_loop->inner->next)
 112                 {
 113                   /* If outer_loop has 2 inner loops, allow there to
 114                      be an extra basic block which decides which of the
 115                      two loops to use using LOOP_VECTORIZED.  */
 116                   if (!single_pred_p (e->src)
 117                       || single_pred (e->src) != outer_loop->header)
 118                     continue;
 119                 }
 120               else
 121                 continue;
 122             }
 123         }
 124       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 125            gsi_next (&gsi))
 126         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
 127     }
 128 }
 129
 130
 131 struct adjust_info
 132 {
 133   tree from, to;
 134   basic_block bb;
 135 };
 136
 137 /* A stack of values to be adjusted in debug stmts.  We have to
 138    process them LIFO, so that the closest substitution applies.  If we
 139    processed them FIFO, without the stack, we might substitute uses
 140    with a PHI DEF that would soon become non-dominant, and when we got
 141    to the suitable one, it wouldn't have anything to substitute any
 142    more.  */
 143 static vec<adjust_info, va_heap> adjust_vec;
 144
 145 /* Adjust any debug stmts that referenced AI->from values to use the
 146    loop-closed AI->to, if the references are dominated by AI->bb and
 147    not by the definition of AI->from.  */
 148
 149 static void
 150 adjust_debug_stmts_now (adjust_info *ai)
 151 {
 152   basic_block bbphi = ai->bb;
 153   tree orig_def = ai->from;
 154   tree new_def = ai->to;
 155   imm_use_iterator imm_iter;
 156   gimple *stmt;
 157   basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
 158
 159   gcc_assert (dom_info_available_p (CDI_DOMINATORS));
 160
 161   /* Adjust any debug stmts that held onto non-loop-closed
 162      references.  */
 163   FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
 164     {
 165       use_operand_p use_p;
 166       basic_block bbuse;
 167
 168       if (!is_gimple_debug (stmt))
 169         continue;
 170
 171       gcc_assert (gimple_debug_bind_p (stmt));
 172
 173       bbuse = gimple_bb (stmt);
 174
 175       if ((bbuse == bbphi
 176            || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
 177           && !(bbuse == bbdef
 178                || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
 179         {
 180           if (new_def)
 181             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 182               SET_USE (use_p, new_def);
 183           else
 184             {
 185               gimple_debug_bind_reset_value (stmt);
 186               update_stmt (stmt);
 187             }
 188         }
 189     }
 190 }
 191
 192 /* Adjust debug stmts as scheduled before.  */
 193
 194 static void
 195 adjust_vec_debug_stmts (void)
 196 {
 197   if (!MAY_HAVE_DEBUG_STMTS)
 198     return;
 199
 200   gcc_assert (adjust_vec.exists ());
 201
 202   while (!adjust_vec.is_empty ())
 203     {
 204       adjust_debug_stmts_now (&adjust_vec.last ());
 205       adjust_vec.pop ();
 206     }
 207 }
 208
 209 /* Adjust any debug stmts that referenced FROM values to use the
 210    loop-closed TO, if the references are dominated by BB and not by
 211    the definition of FROM.  If adjust_vec is non-NULL, adjustments
 212    will be postponed until adjust_vec_debug_stmts is called.  */
 213
 214 static void
 215 adjust_debug_stmts (tree from, tree to, basic_block bb)
 216 {
 217   adjust_info ai;
 218
 219   if (MAY_HAVE_DEBUG_STMTS
 220       && TREE_CODE (from) == SSA_NAME
 221       && ! SSA_NAME_IS_DEFAULT_DEF (from)
 222       && ! virtual_operand_p (from))
 223     {
 224       ai.from = from;
 225       ai.to = to;
 226       ai.bb = bb;
 227
 228       if (adjust_vec.exists ())
 229         adjust_vec.safe_push (ai);
 230       else
 231         adjust_debug_stmts_now (&ai);
 232     }
 233 }
 234
 235 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
 236    to adjust any debug stmts that referenced the old phi arg,
 237    presumably non-loop-closed references left over from other
 238    transformations.  */
 239
 240 static void
 241 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
 242 {
 243   tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
 244
 245   SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
 246
 247   if (MAY_HAVE_DEBUG_STMTS)
 248     adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
 249                         gimple_bb (update_phi));
 250 }
 251
 252 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 253    that starts at zero, increases by one and its limit is NITERS.
 254
 255    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 256
 257 void
 258 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 259 {
 260   tree indx_before_incr, indx_after_incr;
 261   gcond *cond_stmt;
 262   gcond *orig_cond;
 263   edge exit_edge = single_exit (loop);
 264   gimple_stmt_iterator loop_cond_gsi;
 265   gimple_stmt_iterator incr_gsi;
 266   bool insert_after;
 267   tree init = build_int_cst (TREE_TYPE (niters), 0);
 268   tree step = build_int_cst (TREE_TYPE (niters), 1);
 269   source_location loop_loc;
 270   enum tree_code code;
 271
 272   orig_cond = get_loop_exit_condition (loop);
 273   gcc_assert (orig_cond);
 274   loop_cond_gsi = gsi_for_stmt (orig_cond);
 275
 276   standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 277   create_iv (init, step, NULL_TREE, loop,
 278              &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
 279
 280   indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
 281                                               true, NULL_TREE, true,
 282                                               GSI_SAME_STMT);
 283   niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
 284                                      true, GSI_SAME_STMT);
 285
 286   code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
 287   cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
 288                                  NULL_TREE);
 289
 290   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 291
 292   /* Remove old loop exit test:  */
 293   gsi_remove (&loop_cond_gsi, true);
 294   free_stmt_vec_info (orig_cond);
 295
 296   loop_loc = find_loop_location (loop);
 297   if (dump_enabled_p ())
 298     {
 299       if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
 300         dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
 301                      LOCATION_LINE (loop_loc));
 302       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
 303     }
 304
 305   /* Record the number of latch iterations.  */
 306   loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
 307                                      build_int_cst (TREE_TYPE (niters), 1));
 308 }
 309
 310 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
 311    For all PHI arguments in FROM->dest and TO->dest from those
 312    edges ensure that TO->dest PHI arguments have current_def
 313    to that in from.  */
 314
 315 static void
 316 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
 317 {
 318   gimple_stmt_iterator gsi_from, gsi_to;
 319
 320   for (gsi_from = gsi_start_phis (from->dest),
 321        gsi_to = gsi_start_phis (to->dest);
 322        !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
 323     {
 324       gimple *from_phi = gsi_stmt (gsi_from);
 325       gimple *to_phi = gsi_stmt (gsi_to);
 326       tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
 327       tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
 328       if (virtual_operand_p (from_arg))
 329         {
 330           gsi_next (&gsi_from);
 331           continue;
 332         }
 333       if (virtual_operand_p (to_arg))
 334         {
 335           gsi_next (&gsi_to);
 336           continue;
 337         }
 338       if (TREE_CODE (from_arg) != SSA_NAME)
 339         gcc_assert (operand_equal_p (from_arg, to_arg, 0));
 340       else
 341         {
 342           if (get_current_def (to_arg) == NULL_TREE)
 343             set_current_def (to_arg, get_current_def (from_arg));
 344         }
 345       gsi_next (&gsi_from);
 346       gsi_next (&gsi_to);
 347     }
 348
 349   gphi *from_phi = get_virtual_phi (from->dest);
 350   gphi *to_phi = get_virtual_phi (to->dest);
 351   if (from_phi)
 352     set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
 353                      get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
 354 }
 355
 356
 357 /* Given LOOP this function generates a new copy of it and puts it
 358    on E which is either the entry or exit of LOOP.  If SCALAR_LOOP is
 359    non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
 360    basic blocks from SCALAR_LOOP instead of LOOP, but to either the
 361    entry or exit of LOOP.  */
 362
 363 struct loop *
 364 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
 365                                         struct loop *scalar_loop, edge e)
 366 {
 367   struct loop *new_loop;
 368   basic_block *new_bbs, *bbs, *pbbs;
 369   bool at_exit;
 370   bool was_imm_dom;
 371   basic_block exit_dest;
 372   edge exit, new_exit;
 373   bool duplicate_outer_loop = false;
 374
 375   exit = single_exit (loop);
 376   at_exit = (e == exit);
 377   if (!at_exit && e != loop_preheader_edge (loop))
 378     return NULL;
 379
 380   if (scalar_loop == NULL)
 381     scalar_loop = loop;
 382
 383   bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 384   pbbs = bbs + 1;
 385   get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
 386   /* Allow duplication of outer loops.  */
 387   if (scalar_loop->inner)
 388     duplicate_outer_loop = true;
 389   /* Check whether duplication is possible.  */
 390   if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
 391     {
 392       free (bbs);
 393       return NULL;
 394     }
 395
 396   /* Generate new loop structure.  */
 397   new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
 398   duplicate_subloops (scalar_loop, new_loop);
 399
 400   exit_dest = exit->dest;
 401   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 402                                           exit_dest) == loop->header ?
 403                  true : false);
 404
 405   /* Also copy the pre-header, this avoids jumping through hoops to
 406      duplicate the loop entry PHI arguments.  Create an empty
 407      pre-header unconditionally for this.  */
 408   basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
 409   edge entry_e = single_pred_edge (preheader);
 410   bbs[0] = preheader;
 411   new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
 412
 413   exit = single_exit (scalar_loop);
 414   copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
 415             &exit, 1, &new_exit, NULL,
 416             at_exit ? loop->latch : e->src, true);
 417   exit = single_exit (loop);
 418   basic_block new_preheader = new_bbs[0];
 419
 420   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
 421
 422   if (scalar_loop != loop)
 423     {
 424       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
 425          SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
 426          but LOOP will not.  slpeel_update_phi_nodes_for_guard{1,2} expects
 427          the LOOP SSA_NAMEs (on the exit edge and edge from latch to
 428          header) to have current_def set, so copy them over.  */
 429       slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
 430                                                 exit);
 431       slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
 432                                                            0),
 433                                                 EDGE_SUCC (loop->latch, 0));
 434     }
 435
 436   if (at_exit) /* Add the loop copy at exit.  */
 437     {
 438       if (scalar_loop != loop)
 439         {
 440           gphi_iterator gsi;
 441           new_exit = redirect_edge_and_branch (new_exit, exit_dest);
 442
 443           for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
 444                gsi_next (&gsi))
 445             {
 446               gphi *phi = gsi.phi ();
 447               tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
 448               location_t orig_locus
 449                 = gimple_phi_arg_location_from_edge (phi, e);
 450
 451               add_phi_arg (phi, orig_arg, new_exit, orig_locus);
 452             }
 453         }
 454       redirect_edge_and_branch_force (e, new_preheader);
 455       flush_pending_stmts (e);
 456       set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
 457       if (was_imm_dom || duplicate_outer_loop)
 458         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
 459
 460       /* And remove the non-necessary forwarder again.  Keep the other
 461          one so we have a proper pre-header for the loop at the exit edge.  */
 462       redirect_edge_pred (single_succ_edge (preheader),
 463                           single_pred (preheader));
 464       delete_basic_block (preheader);
 465       set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 466                                loop_preheader_edge (scalar_loop)->src);
 467     }
 468   else /* Add the copy at entry.  */
 469     {
 470       if (scalar_loop != loop)
 471         {
 472           /* Remove the non-necessary forwarder of scalar_loop again.  */
 473           redirect_edge_pred (single_succ_edge (preheader),
 474                               single_pred (preheader));
 475           delete_basic_block (preheader);
 476           set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
 477                                    loop_preheader_edge (scalar_loop)->src);
 478           preheader = split_edge (loop_preheader_edge (loop));
 479           entry_e = single_pred_edge (preheader);
 480         }
 481
 482       redirect_edge_and_branch_force (entry_e, new_preheader);
 483       flush_pending_stmts (entry_e);
 484       set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
 485
 486       redirect_edge_and_branch_force (new_exit, preheader);
 487       flush_pending_stmts (new_exit);
 488       set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
 489
 490       /* And remove the non-necessary forwarder again.  Keep the other
 491          one so we have a proper pre-header for the loop at the exit edge.  */
 492       redirect_edge_pred (single_succ_edge (new_preheader),
 493                           single_pred (new_preheader));
 494       delete_basic_block (new_preheader);
 495       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 496                                loop_preheader_edge (new_loop)->src);
 497     }
 498
 499   for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
 500     rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
 501
 502   if (scalar_loop != loop)
 503     {
 504       /* Update new_loop->header PHIs, so that on the preheader
 505          edge they are the ones from loop rather than scalar_loop.  */
 506       gphi_iterator gsi_orig, gsi_new;
 507       edge orig_e = loop_preheader_edge (loop);
 508       edge new_e = loop_preheader_edge (new_loop);
 509
 510       for (gsi_orig = gsi_start_phis (loop->header),
 511            gsi_new = gsi_start_phis (new_loop->header);
 512            !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
 513            gsi_next (&gsi_orig), gsi_next (&gsi_new))
 514         {
 515           gphi *orig_phi = gsi_orig.phi ();
 516           gphi *new_phi = gsi_new.phi ();
 517           tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
 518           location_t orig_locus
 519             = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
 520
 521           add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
 522         }
 523     }
 524
 525   free (new_bbs);
 526   free (bbs);
 527
 528   checking_verify_dominators (CDI_DOMINATORS);
 529
 530   return new_loop;
 531 }
 532
 533
 534 /* Given the condition expression COND, put it as the last statement of
 535    GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
 536    DOM_BB; return the skip edge.  GUARD_TO is the target basic block to
 537    skip the loop.  PROBABILITY is the skip edge's probability.  */
 538
 539 static edge
 540 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
 541                        basic_block guard_to, basic_block dom_bb,
 542                        int probability)
 543 {
 544   gimple_stmt_iterator gsi;
 545   edge new_e, enter_e;
 546   gcond *cond_stmt;
 547   gimple_seq gimplify_stmt_list = NULL;
 548
 549   enter_e = EDGE_SUCC (guard_bb, 0);
 550   enter_e->flags &= ~EDGE_FALLTHRU;
 551   enter_e->flags |= EDGE_FALSE_VALUE;
 552   gsi = gsi_last_bb (guard_bb);
 553
 554   cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
 555                                  NULL_TREE);
 556   if (gimplify_stmt_list)
 557     gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
 558
 559   cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
 560   gsi = gsi_last_bb (guard_bb);
 561   gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
 562
 563   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 564   new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
 565
 566   new_e->count = guard_bb->count;
 567   new_e->probability = probability;
 568   new_e->count = apply_probability (enter_e->count, probability);
 569   enter_e->count -= new_e->count;
 570   enter_e->probability = inverse_probability (probability);
 571   set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
 572   return new_e;
 573 }
 574
 575
 576 /* This function verifies that the following restrictions apply to LOOP:
 577    (1) it consists of exactly 2 basic blocks - header, and an empty latch
 578        for innermost loop and 5 basic blocks for outer-loop.
 579    (2) it is single entry, single exit
 580    (3) its exit condition is the last stmt in the header
 581    (4) E is the entry/exit edge of LOOP.
 582  */
 583
 584 bool
 585 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
 586 {
 587   edge exit_e = single_exit (loop);
 588   edge entry_e = loop_preheader_edge (loop);
 589   gcond *orig_cond = get_loop_exit_condition (loop);
 590   gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
 591   unsigned int num_bb = loop->inner? 5 : 2;
 592
 593   /* All loops have an outer scope; the only case loop->outer is NULL is for
 594      the function itself.  */
 595   if (!loop_outer (loop)
 596       || loop->num_nodes != num_bb
 597       || !empty_block_p (loop->latch)
 598       || !single_exit (loop)
 599       /* Verify that new loop exit condition can be trivially modified.  */
 600       || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
 601       || (e != exit_e && e != entry_e))
 602     return false;
 603
 604   return true;
 605 }
 606
 607 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
 608    in the exit bb and rename all the uses after the loop.  This simplifies
 609    the *guard[12] routines, which assume loop closed SSA form for all PHIs
 610    (but normally loop closed SSA form doesn't require virtual PHIs to be
 611    in the same form).  Doing this early simplifies the checking what
 612    uses should be renamed.  */
 613
 614 static void
 615 create_lcssa_for_virtual_phi (struct loop *loop)
 616 {
 617   gphi_iterator gsi;
 618   edge exit_e = single_exit (loop);
 619
 620   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
 621     if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 622       {
 623         gphi *phi = gsi.phi ();
 624         for (gsi = gsi_start_phis (exit_e->dest);
 625              !gsi_end_p (gsi); gsi_next (&gsi))
 626           if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
 627             break;
 628         if (gsi_end_p (gsi))
 629           {
 630             tree new_vop = copy_ssa_name (PHI_RESULT (phi));
 631             gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
 632             tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
 633             imm_use_iterator imm_iter;
 634             gimple *stmt;
 635             use_operand_p use_p;
 636
 637             add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
 638             gimple_phi_set_result (new_phi, new_vop);
 639             FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
 640               if (stmt != new_phi
 641                   && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
 642                 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 643                   SET_USE (use_p, new_vop);
 644           }
 645         break;
 646       }
 647
 648 }
 649
 650 /* Function vect_get_loop_location.
 651
 652    Extract the location of the loop in the source code.
 653    If the loop is not well formed for vectorization, an estimated
 654    location is calculated.
 655    Return the loop location if succeed and NULL if not.  */
 656
 657 source_location
 658 find_loop_location (struct loop *loop)
 659 {
 660   gimple *stmt = NULL;
 661   basic_block bb;
 662   gimple_stmt_iterator si;
 663
 664   if (!loop)
 665     return UNKNOWN_LOCATION;
 666
 667   stmt = get_loop_exit_condition (loop);
 668
 669   if (stmt
 670       && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 671     return gimple_location (stmt);
 672
 673   /* If we got here the loop is probably not "well formed",
 674      try to estimate the loop location */
 675
 676   if (!loop->header)
 677     return UNKNOWN_LOCATION;
 678
 679   bb = loop->header;
 680
 681   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 682     {
 683       stmt = gsi_stmt (si);
 684       if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
 685         return gimple_location (stmt);
 686     }
 687
 688   return UNKNOWN_LOCATION;
 689 }
 690
 691 /* Return true if PHI defines an IV of the loop to be vectorized.  */
 692
 693 static bool
 694 iv_phi_p (gphi *phi)
 695 {
 696   if (virtual_operand_p (PHI_RESULT (phi)))
 697     return false;
 698
 699   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 700   gcc_assert (stmt_info != NULL);
 701   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
 702       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
 703     return false;
 704
 705   return true;
 706 }
 707
 708 /* Function vect_can_advance_ivs_p
 709
 710    In case the number of iterations that LOOP iterates is unknown at compile
 711    time, an epilog loop will be generated, and the loop induction variables
 712    (IVs) will be "advanced" to the value they are supposed to take just before
 713    the epilog loop.  Here we check that the access function of the loop IVs
 714    and the expression that represents the loop bound are simple enough.
 715    These restrictions will be relaxed in the future.  */
 716
 717 bool
 718 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 719 {
 720   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 721   basic_block bb = loop->header;
 722   gphi_iterator gsi;
 723
 724   /* Analyze phi functions of the loop header.  */
 725
 726   if (dump_enabled_p ())
 727     dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
 728   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 729     {
 730       tree evolution_part;
 731
 732       gphi *phi = gsi.phi ();
 733       if (dump_enabled_p ())
 734         {
 735           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 736           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 737         }
 738
 739       /* Skip virtual phi's. The data dependences that are associated with
 740          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
 741
 742          Skip reduction phis.  */
 743       if (!iv_phi_p (phi))
 744         {
 745           if (dump_enabled_p ())
 746             dump_printf_loc (MSG_NOTE, vect_location,
 747                              "reduc or virtual phi. skip.\n");
 748           continue;
 749         }
 750
 751       /* Analyze the evolution function.  */
 752
 753       evolution_part
 754         = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 755       if (evolution_part == NULL_TREE)
 756         {
 757           if (dump_enabled_p ())
 758             dump_printf (MSG_MISSED_OPTIMIZATION,
 759                          "No access function or evolution.\n");
 760           return false;
 761         }
 762
 763       /* FORNOW: We do not transform initial conditions of IVs
 764          which evolution functions are not invariants in the loop.  */
 765
 766       if (!expr_invariant_in_loop_p (loop, evolution_part))
 767         {
 768           if (dump_enabled_p ())
 769             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 770                              "evolution not invariant in loop.\n");
 771           return false;
 772         }
 773
 774       /* FORNOW: We do not transform initial conditions of IVs
 775          which evolution functions are a polynomial of degree >= 2.  */
 776
 777       if (tree_is_chrec (evolution_part))
 778         {
 779           if (dump_enabled_p ())
 780             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 781                              "evolution is chrec.\n");
 782           return false;
 783         }
 784     }
 785
 786   return true;
 787 }
 788
 789
 790 /*   Function vect_update_ivs_after_vectorizer.
 791
 792      "Advance" the induction variables of LOOP to the value they should take
 793      after the execution of LOOP.  This is currently necessary because the
 794      vectorizer does not handle induction variables that are used after the
 795      loop.  Such a situation occurs when the last iterations of LOOP are
 796      peeled, because:
 797      1. We introduced new uses after LOOP for IVs that were not originally used
 798         after LOOP: the IVs of LOOP are now used by an epilog loop.
 799      2. LOOP is going to be vectorized; this means that it will iterate N/VF
 800         times, whereas the loop IVs should be bumped N times.
 801
 802      Input:
 803      - LOOP - a loop that is going to be vectorized. The last few iterations
 804               of LOOP were peeled.
 805      - NITERS - the number of iterations that LOOP executes (before it is
 806                 vectorized). i.e, the number of times the ivs should be bumped.
 807      - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
 808                   coming out from LOOP on which there are uses of the LOOP ivs
 809                   (this is the path from LOOP->exit to epilog_loop->preheader).
 810
 811                   The new definitions of the ivs are placed in LOOP->exit.
 812                   The phi args associated with the edge UPDATE_E in the bb
 813                   UPDATE_E->dest are updated accordingly.
 814
 815      Assumption 1: Like the rest of the vectorizer, this function assumes
 816      a single loop exit that has a single predecessor.
 817
 818      Assumption 2: The phi nodes in the LOOP header and in update_bb are
 819      organized in the same order.
 820
 821      Assumption 3: The access function of the ivs is simple enough (see
 822      vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
 823
 824      Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
 825      coming out of LOOP on which the ivs of LOOP are used (this is the path
 826      that leads to the epilog loop; other paths skip the epilog loop).  This
 827      path starts with the edge UPDATE_E, and its destination (denoted update_bb)
 828      needs to have its phis updated.
 829  */
 830
 831 static void
 832 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
 833                                   tree niters, edge update_e)
 834 {
 835   gphi_iterator gsi, gsi1;
 836   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 837   basic_block update_bb = update_e->dest;
 838   basic_block exit_bb = single_exit (loop)->dest;
 839
 840   /* Make sure there exists a single-predecessor exit bb:  */
 841   gcc_assert (single_pred_p (exit_bb));
 842   gcc_assert (single_succ_edge (exit_bb) == update_e);
 843
 844   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
 845        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
 846        gsi_next (&gsi), gsi_next (&gsi1))
 847     {
 848       tree init_expr;
 849       tree step_expr, off;
 850       tree type;
 851       tree var, ni, ni_name;
 852       gimple_stmt_iterator last_gsi;
 853
 854       gphi *phi = gsi.phi ();
 855       gphi *phi1 = gsi1.phi ();
 856       if (dump_enabled_p ())
 857         {
 858           dump_printf_loc (MSG_NOTE, vect_location,
 859                            "vect_update_ivs_after_vectorizer: phi: ");
 860           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 861         }
 862
 863       /* Skip reduction and virtual phis.  */
 864       if (!iv_phi_p (phi))
 865         {
 866           if (dump_enabled_p ())
 867             dump_printf_loc (MSG_NOTE, vect_location,
 868                              "reduc or virtual phi. skip.\n");
 869           continue;
 870         }
 871
 872       type = TREE_TYPE (gimple_phi_result (phi));
 873       step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
 874       step_expr = unshare_expr (step_expr);
 875
 876       /* FORNOW: We do not support IVs whose evolution function is a polynomial
 877          of degree >= 2 or exponential.  */
 878       gcc_assert (!tree_is_chrec (step_expr));
 879
 880       init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 881
 882       off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
 883                          fold_convert (TREE_TYPE (step_expr), niters),
 884                          step_expr);
 885       if (POINTER_TYPE_P (type))
 886         ni = fold_build_pointer_plus (init_expr, off);
 887       else
 888         ni = fold_build2 (PLUS_EXPR, type,
 889                           init_expr, fold_convert (type, off));
 890
 891       var = create_tmp_var (type, "tmp");
 892
 893       last_gsi = gsi_last_bb (exit_bb);
 894       gimple_seq new_stmts = NULL;
 895       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
 896       /* Exit_bb shouldn't be empty.  */
 897       if (!gsi_end_p (last_gsi))
 898         gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
 899       else
 900         gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
 901
 902       /* Fix phi expressions in the successor bb.  */
 903       adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
 904     }
 905 }
 906
 907 /* Function vect_gen_prolog_loop_niters
 908
 909    Generate the number of iterations which should be peeled as prolog for the
 910    loop represented by LOOP_VINFO.  It is calculated as the misalignment of
 911    DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
 912    As a result, after the execution of this loop, the data reference DR will
 913    refer to an aligned location.  The following computation is generated:
 914
 915    If the misalignment of DR is known at compile time:
 916      addr_mis = int mis = DR_MISALIGNMENT (dr);
 917    Else, compute address misalignment in bytes:
 918      addr_mis = addr & (vectype_align - 1)
 919
 920    prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
 921
 922    (elem_size = element type size; an element is the scalar element whose type
 923    is the inner type of the vectype)
 924
 925    The computations will be emitted at the end of BB.  We also compute and
 926    store upper bound (included) of the result in BOUND.
 927
 928    When the step of the data-ref in the loop is not 1 (as in interleaved data
 929    and SLP), the number of iterations of the prolog must be divided by the step
 930    (which is equal to the size of interleaved group).
 931
 932    The above formulas assume that VF == number of elements in the vector. This
 933    may not hold when there are multiple-types in the loop.
 934    In this case, for some data-references in the loop the VF does not represent
 935    the number of elements that fit in the vector.  Therefore, instead of VF we
 936    use TYPE_VECTOR_SUBPARTS.  */
 937
 938 static tree
 939 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 940                              basic_block bb, int *bound)
 941 {
 942   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
 943   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 944   tree var;
 945   tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
 946   gimple_seq stmts = NULL, new_stmts = NULL;
 947   tree iters, iters_name;
 948   gimple *dr_stmt = DR_STMT (dr);
 949   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
 950   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 951   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
 952   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 953
 954   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
 955     {
 956       int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 957
 958       if (dump_enabled_p ())
 959         dump_printf_loc (MSG_NOTE, vect_location,
 960                          "known peeling = %d.\n", npeel);
 961
 962       iters = build_int_cst (niters_type, npeel);
 963       *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 964     }
 965   else
 966     {
 967       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
 968       tree offset = negative
 969           ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
 970       tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
 971                                                 &stmts, offset, loop);
 972       tree type = unsigned_type_for (TREE_TYPE (start_addr));
 973       tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
 974       HOST_WIDE_INT elem_size =
 975                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
 976       tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 977       tree nelements_minus_1 = build_int_cst (type, nelements - 1);
 978       tree nelements_tree = build_int_cst (type, nelements);
 979       tree byte_misalign;
 980       tree elem_misalign;
 981
 982       /* Create:  byte_misalign = addr & (vectype_align - 1)  */
 983       byte_misalign =
 984         fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
 985                      vectype_align_minus_1);
 986
 987       /* Create:  elem_misalign = byte_misalign / element_size  */
 988       elem_misalign =
 989         fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
 990
 991       /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
 992       if (negative)
 993         iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
 994       else
 995         iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
 996       iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
 997       iters = fold_convert (niters_type, iters);
 998       *bound = nelements - 1;
 999     }
1000
1001   if (dump_enabled_p ())
1002     {
1003       dump_printf_loc (MSG_NOTE, vect_location,
1004                        "niters for prolog loop: ");
1005       dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
1006       dump_printf (MSG_NOTE, "\n");
1007     }
1008
1009   var = create_tmp_var (niters_type, "prolog_loop_niters");
1010   iters_name = force_gimple_operand (iters, &new_stmts, false, var);
1011
1012   if (new_stmts)
1013     gimple_seq_add_seq (&stmts, new_stmts);
1014   if (stmts)
1015     {
1016       gcc_assert (single_succ_p (bb));
1017       gimple_stmt_iterator gsi = gsi_last_bb (bb);
1018       if (gsi_end_p (gsi))
1019         gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1020       else
1021         gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
1022     }
1023   return iters_name;
1024 }
1025
1026
1027 /* Function vect_update_init_of_dr
1028
1029    NITERS iterations were peeled from LOOP.  DR represents a data reference
1030    in LOOP.  This function updates the information recorded in DR to
1031    account for the fact that the first NITERS iterations had already been
1032    executed.  Specifically, it updates the OFFSET field of DR.  */
1033
1034 static void
1035 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1036 {
1037   tree offset = DR_OFFSET (dr);
1038
1039   niters = fold_build2 (MULT_EXPR, sizetype,
1040                         fold_convert (sizetype, niters),
1041                         fold_convert (sizetype, DR_STEP (dr)));
1042   offset = fold_build2 (PLUS_EXPR, sizetype,
1043                         fold_convert (sizetype, offset), niters);
1044   DR_OFFSET (dr) = offset;
1045 }
1046
1047
1048 /* Function vect_update_inits_of_drs
1049
1050    NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1051    This function updates the information recorded for the data references in
1052    the loop to account for the fact that the first NITERS iterations had
1053    already been executed.  Specifically, it updates the initial_condition of
1054    the access_function of all the data_references in the loop.  */
1055
1056 static void
1057 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1058 {
1059   unsigned int i;
1060   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061   struct data_reference *dr;
1062
1063   if (dump_enabled_p ())
1064     dump_printf_loc (MSG_NOTE, vect_location,
1065                      "=== vect_update_inits_of_dr ===\n");
1066
1067   /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
1068   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1069     {
1070       gimple_seq seq;
1071       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1072       tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1073
1074       niters = fold_convert (sizetype, niters);
1075       niters = force_gimple_operand (niters, &seq, false, var);
1076       if (seq)
1077         {
1078           basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1079           gcc_assert (!new_bb);
1080         }
1081     }
1082
1083   FOR_EACH_VEC_ELT (datarefs, i, dr)
1084     vect_update_init_of_dr (dr, niters);
1085 }
1086
1087
1088 /* This function builds ni_name = number of iterations.  Statements
1089    are emitted on the loop preheader edge.  */
1090
1091 tree
1092 vect_build_loop_niters (loop_vec_info loop_vinfo)
1093 {
1094   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1095   if (TREE_CODE (ni) == INTEGER_CST)
1096     return ni;
1097   else
1098     {
1099       tree ni_name, var;
1100       gimple_seq stmts = NULL;
1101       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1102
1103       var = create_tmp_var (TREE_TYPE (ni), "niters");
1104       ni_name = force_gimple_operand (ni, &stmts, false, var);
1105       if (stmts)
1106         gsi_insert_seq_on_edge_immediate (pe, stmts);
1107
1108       return ni_name;
1109     }
1110 }
1111
1112 /* Calculate the number of iterations above which vectorized loop will be
1113    preferred than scalar loop.  NITERS_PROLOG is the number of iterations
1114    of prolog loop.  If it's integer const, the integer number is also passed
1115    in INT_NITERS_PROLOG.  BOUND_PROLOG is the upper bound (included) of
1116    number of iterations of prolog loop.  VFM1 is vector factor minus one.
1117    If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1118    (rather than vectorized) loop will be executed.  This function stores
1119    upper bound (included) of the result in BOUND_SCALAR.  */
1120
1121 static tree
1122 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
1123                              int bound_prolog, int vfm1, int th,
1124                              int *bound_scalar, bool check_profitability)
1125 {
1126   tree type = TREE_TYPE (niters_prolog);
1127   tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
1128                              build_int_cst (type, vfm1));
1129
1130   *bound_scalar = vfm1 + bound_prolog;
1131   if (check_profitability)
1132     {
1133       /* TH indicates the minimum niters of vectorized loop, while we
1134          compute the maximum niters of scalar loop.  */
1135       th--;
1136       /* Peeling for constant times.  */
1137       if (int_niters_prolog >= 0)
1138         {
1139           *bound_scalar = (int_niters_prolog + vfm1 < th
1140                             ? th
1141                             : vfm1 + int_niters_prolog);
1142           return build_int_cst (type, *bound_scalar);
1143         }
1144       /* Peeling for unknown times.  Note BOUND_PROLOG is the upper
1145          bound (inlcuded) of niters of prolog loop.  */
1146       if (th >=  vfm1 + bound_prolog)
1147         {
1148           *bound_scalar = th;
1149           return build_int_cst (type, th);
1150         }
1151       /* Need to do runtime comparison, but BOUND_SCALAR remains the same.  */
1152       else if (th > vfm1)
1153         return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1154     }
1155   return niters;
1156 }
1157
1158 /* This function generates the following statements:
1159
1160    niters = number of iterations loop executes (after peeling)
1161    niters_vector = niters / vf
1162
1163    and places them on the loop preheader edge.  NITERS_NO_OVERFLOW is
1164    true if NITERS doesn't overflow.  */
1165
1166 void
1167 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1168                              tree *niters_vector_ptr, bool niters_no_overflow)
1169 {
1170   tree ni_minus_gap, var;
1171   tree niters_vector;
1172   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1174   tree log_vf = build_int_cst (TREE_TYPE (niters), exact_log2 (vf));
1175
1176   /* If epilogue loop is required because of data accesses with gaps, we
1177      subtract one iteration from the total number of iterations here for
1178      correct calculation of RATIO.  */
1179   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1180     {
1181       ni_minus_gap = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1182                                   niters,
1183                                   build_one_cst (TREE_TYPE (niters)));
1184       if (!is_gimple_val (ni_minus_gap))
1185         {
1186           var = create_tmp_var (TREE_TYPE (niters), "ni_gap");
1187           gimple *stmts = NULL;
1188           ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1189                                                true, var);
1190           gsi_insert_seq_on_edge_immediate (pe, stmts);
1191         }
1192     }
1193   else
1194     ni_minus_gap = niters;
1195
1196   /* Create: niters >> log2(vf) */
1197   /* If it's known that niters == number of latch executions + 1 doesn't
1198      overflow, we can generate niters >> log2(vf); otherwise we generate
1199      (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1200      will be at least one.  */
1201   if (niters_no_overflow)
1202     niters_vector = fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1203                                  ni_minus_gap, log_vf);
1204   else
1205     niters_vector
1206       = fold_build2 (PLUS_EXPR, TREE_TYPE (niters),
1207                      fold_build2 (RSHIFT_EXPR, TREE_TYPE (niters),
1208                                   fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
1209                                                ni_minus_gap,
1210                                                build_int_cst
1211                                                  (TREE_TYPE (niters), vf)),
1212                                   log_vf),
1213                      build_int_cst (TREE_TYPE (niters), 1));
1214
1215   if (!is_gimple_val (niters_vector))
1216     {
1217       var = create_tmp_var (TREE_TYPE (niters), "bnd");
1218       gimple *stmts = NULL;
1219       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1220       gsi_insert_seq_on_edge_immediate (pe, stmts);
1221     }
1222   *niters_vector_ptr = niters_vector;
1223
1224   return;
1225 }
1226
1227 /* Given NITERS_VECTOR which is the number of iterations for vectorized
1228    loop specified by LOOP_VINFO after vectorization, compute the number
1229    of iterations before vectorization (niters_vector * vf) and store it
1230    to NITERS_VECTOR_MULT_VF_PTR.  */
1231
1232 static void
1233 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1234                                      tree niters_vector,
1235                                      tree *niters_vector_mult_vf_ptr)
1236 {
1237   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1238   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239   tree type = TREE_TYPE (niters_vector);
1240   tree log_vf = build_int_cst (type, exact_log2 (vf));
1241   basic_block exit_bb = single_exit (loop)->dest;
1242
1243   gcc_assert (niters_vector_mult_vf_ptr != NULL);
1244   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1245                                             niters_vector, log_vf);
1246   if (!is_gimple_val (niters_vector_mult_vf))
1247     {
1248       tree var = create_tmp_var (type, "niters_vector_mult_vf");
1249       gimple_seq stmts = NULL;
1250       niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1251                                                     &stmts, true, var);
1252       gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1253       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1254     }
1255   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1256 }
1257
1258 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1259    from SECOND/FIRST and puts it at the original loop's preheader/exit
1260    edge, the two loops are arranged as below:
1261
1262        preheader_a:
1263      first_loop:
1264        header_a:
1265          i_1 = PHI<i_0, i_2>;
1266          ...
1267          i_2 = i_1 + 1;
1268          if (cond_a)
1269            goto latch_a;
1270          else
1271            goto between_bb;
1272        latch_a:
1273          goto header_a;
1274
1275        between_bb:
1276          ;; i_x = PHI<i_2>;   ;; LCSSA phi node to be created for FIRST,
1277
1278      second_loop:
1279        header_b:
1280          i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1281                                  or with i_2 if no LCSSA phi is created
1282                                  under condition of CREATE_LCSSA_FOR_IV_PHIS.
1283          ...
1284          i_4 = i_3 + 1;
1285          if (cond_b)
1286            goto latch_b;
1287          else
1288            goto exit_bb;
1289        latch_b:
1290          goto header_b;
1291
1292        exit_bb:
1293
1294    This function creates loop closed SSA for the first loop; update the
1295    second loop's PHI nodes by replacing argument on incoming edge with the
1296    result of newly created lcssa PHI nodes.  IF CREATE_LCSSA_FOR_IV_PHIS
1297    is false, Loop closed ssa phis will only be created for non-iv phis for
1298    the first loop.
1299
1300    This function assumes exit bb of the first loop is preheader bb of the
1301    second loop, i.e, between_bb in the example code.  With PHIs updated,
1302    the second loop will execute rest iterations of the first.  */
1303
1304 static void
1305 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1306                                    struct loop *first, struct loop *second,
1307                                    bool create_lcssa_for_iv_phis)
1308 {
1309   gphi_iterator gsi_update, gsi_orig;
1310   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1311
1312   edge first_latch_e = EDGE_SUCC (first->latch, 0);
1313   edge second_preheader_e = loop_preheader_edge (second);
1314   basic_block between_bb = single_exit (first)->dest;
1315
1316   gcc_assert (between_bb == second_preheader_e->src);
1317   gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1318   /* Either the first loop or the second is the loop to be vectorized.  */
1319   gcc_assert (loop == first || loop == second);
1320
1321   for (gsi_orig = gsi_start_phis (first->header),
1322        gsi_update = gsi_start_phis (second->header);
1323        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1324        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1325     {
1326       gphi *orig_phi = gsi_orig.phi ();
1327       gphi *update_phi = gsi_update.phi ();
1328
1329       tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1330       /* Generate lcssa PHI node for the first loop.  */
1331       gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1332       if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1333         {
1334           tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1335           gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1336           add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1337           arg = new_res;
1338         }
1339
1340       /* Update PHI node in the second loop by replacing arg on the loop's
1341          incoming edge.  */
1342       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1343     }
1344 }
1345
1346 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
1347    of SKIP_LOOP to the beginning of UPDATE_LOOP.  GUARD_EDGE and MERGE_EDGE
1348    are two pred edges of the merge point before UPDATE_LOOP.  The two loops
1349    appear like below:
1350
1351        guard_bb:
1352          if (cond)
1353            goto merge_bb;
1354          else
1355            goto skip_loop;
1356
1357      skip_loop:
1358        header_a:
1359          i_1 = PHI<i_0, i_2>;
1360          ...
1361          i_2 = i_1 + 1;
1362          if (cond_a)
1363            goto latch_a;
1364          else
1365            goto exit_a;
1366        latch_a:
1367          goto header_a;
1368
1369        exit_a:
1370          i_5 = PHI<i_2>;
1371
1372        merge_bb:
1373          ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1374
1375      update_loop:
1376        header_b:
1377          i_3 = PHI<i_5, i_4>;  ;; Use of i_5 to be replaced with i_x.
1378          ...
1379          i_4 = i_3 + 1;
1380          if (cond_b)
1381            goto latch_b;
1382          else
1383            goto exit_bb;
1384        latch_b:
1385          goto header_b;
1386
1387        exit_bb:
1388
1389    This function creates PHI nodes at merge_bb and replaces the use of i_5
1390    in the update_loop's PHI node with the result of new PHI result.  */
1391
1392 static void
1393 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1394                                     struct loop *update_loop,
1395                                     edge guard_edge, edge merge_edge)
1396 {
1397   source_location merge_loc, guard_loc;
1398   edge orig_e = loop_preheader_edge (skip_loop);
1399   edge update_e = loop_preheader_edge (update_loop);
1400   gphi_iterator gsi_orig, gsi_update;
1401
1402   for ((gsi_orig = gsi_start_phis (skip_loop->header),
1403         gsi_update = gsi_start_phis (update_loop->header));
1404        !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1405        gsi_next (&gsi_orig), gsi_next (&gsi_update))
1406     {
1407       gphi *orig_phi = gsi_orig.phi ();
1408       gphi *update_phi = gsi_update.phi ();
1409
1410       /* Generate new phi node at merge bb of the guard.  */
1411       tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1412       gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1413
1414       /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE.  Set the
1415          args in NEW_PHI for these edges.  */
1416       tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1417       tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1418       merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1419       guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1420       add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1421       add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1422
1423       /* Update phi in UPDATE_PHI.  */
1424       adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1425     }
1426 }
1427
1428 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1429    this function searches for the corresponding lcssa phi node in exit
1430    bb of LOOP.  If it is found, return the phi result; otherwise return
1431    NULL.  */
1432
1433 static tree
1434 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1435                 gphi *lcssa_phi)
1436 {
1437   gphi_iterator gsi;
1438   edge e = single_exit (loop);
1439
1440   gcc_assert (single_pred_p (e->dest));
1441   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1442     {
1443       gphi *phi = gsi.phi ();
1444       if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1445                            PHI_ARG_DEF (lcssa_phi, 0), 0))
1446         return PHI_RESULT (phi);
1447     }
1448   return NULL_TREE;
1449 }
1450
1451 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1452    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
1453    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
1454    and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1455    The CFG looks like:
1456
1457      loop:
1458        header_a:
1459          i_1 = PHI<i_0, i_2>;
1460          ...
1461          i_2 = i_1 + 1;
1462          if (cond_a)
1463            goto latch_a;
1464          else
1465            goto exit_a;
1466        latch_a:
1467          goto header_a;
1468
1469        exit_a:
1470
1471        guard_bb:
1472          if (cond)
1473            goto merge_bb;
1474          else
1475            goto epilog_loop;
1476
1477        ;; fall_through_bb
1478
1479      epilog_loop:
1480        header_b:
1481          i_3 = PHI<i_2, i_4>;
1482          ...
1483          i_4 = i_3 + 1;
1484          if (cond_b)
1485            goto latch_b;
1486          else
1487            goto merge_bb;
1488        latch_b:
1489          goto header_b;
1490
1491        merge_bb:
1492          ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1493
1494        exit_bb:
1495          i_x = PHI<i_4>;  ;Use of i_4 to be replaced with i_y in merge_bb.
1496
1497    For each name used out side EPILOG (i.e - for each name that has a lcssa
1498    phi in exit_bb) we create a new PHI in merge_bb.  The new PHI has two
1499    args corresponding to GUARD_EDGE and MERGE_EDGE.  Arg for MERGE_EDGE is
1500    the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1501    by LOOP and is found in the exit bb of LOOP.  Arg of the original PHI
1502    in exit_bb will also be updated.  */
1503
1504 static void
1505 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1506                                     edge guard_edge, edge merge_edge)
1507 {
1508   gphi_iterator gsi;
1509   basic_block merge_bb = guard_edge->dest;
1510
1511   gcc_assert (single_succ_p (merge_bb));
1512   edge e = single_succ_edge (merge_bb);
1513   basic_block exit_bb = e->dest;
1514   gcc_assert (single_pred_p (exit_bb));
1515   gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1516
1517   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1518     {
1519       gphi *update_phi = gsi.phi ();
1520       tree old_arg = PHI_ARG_DEF (update_phi, 0);
1521       /* This loop-closed-phi actually doesn't represent a use out of the
1522          loop - the phi arg is a constant.  */
1523       if (TREE_CODE (old_arg) != SSA_NAME)
1524         continue;
1525
1526       tree merge_arg = get_current_def (old_arg);
1527       if (!merge_arg)
1528         merge_arg = old_arg;
1529
1530       tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1531       /* If the var is live after loop but not a reduction, we simply
1532          use the old arg.  */
1533       if (!guard_arg)
1534         guard_arg = old_arg;
1535
1536       /* Create new phi node in MERGE_BB:  */
1537       tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1538       gphi *merge_phi = create_phi_node (new_res, merge_bb);
1539
1540       /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1541          the two PHI args in merge_phi for these edges.  */
1542       add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1543       add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1544
1545       /* Update the original phi in exit_bb.  */
1546       adjust_phi_and_debug_stmts (update_phi, e, new_res);
1547     }
1548 }
1549
1550 /* EPILOG loop is duplicated from the original loop for vectorizing,
1551    the arg of its loop closed ssa PHI needs to be updated.  */
1552
1553 static void
1554 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1555 {
1556   gphi_iterator gsi;
1557   basic_block exit_bb = single_exit (epilog)->dest;
1558
1559   gcc_assert (single_pred_p (exit_bb));
1560   edge e = EDGE_PRED (exit_bb, 0);
1561   for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1562     rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1563 }
1564
1565 /* Function vect_do_peeling.
1566
1567    Input:
1568    - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1569
1570        preheader:
1571      LOOP:
1572        header_bb:
1573          loop_body
1574          if (exit_loop_cond) goto exit_bb
1575          else                goto header_bb
1576        exit_bb:
1577
1578    - NITERS: The number of iterations of the loop.
1579    - NITERSM1: The number of iterations of the loop's latch.
1580    - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1581    - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1582                               CHECK_PROFITABILITY is true.
1583    Output:
1584    - NITERS_VECTOR: The number of iterations of loop after vectorization.
1585
1586    This function peels prolog and epilog from the loop, adds guards skipping
1587    PROLOG and EPILOG for various conditions.  As a result, the changed CFG
1588    would look like:
1589
1590        guard_bb_1:
1591          if (prefer_scalar_loop) goto merge_bb_1
1592          else                    goto guard_bb_2
1593
1594        guard_bb_2:
1595          if (skip_prolog) goto merge_bb_2
1596          else             goto prolog_preheader
1597
1598        prolog_preheader:
1599      PROLOG:
1600        prolog_header_bb:
1601          prolog_body
1602          if (exit_prolog_cond) goto prolog_exit_bb
1603          else                  goto prolog_header_bb
1604        prolog_exit_bb:
1605
1606        merge_bb_2:
1607
1608        vector_preheader:
1609      VECTOR LOOP:
1610        vector_header_bb:
1611          vector_body
1612          if (exit_vector_cond) goto vector_exit_bb
1613          else                  goto vector_header_bb
1614        vector_exit_bb:
1615
1616        guard_bb_3:
1617          if (skip_epilog) goto merge_bb_3
1618          else             goto epilog_preheader
1619
1620        merge_bb_1:
1621
1622        epilog_preheader:
1623      EPILOG:
1624        epilog_header_bb:
1625          epilog_body
1626          if (exit_epilog_cond) goto merge_bb_3
1627          else                  goto epilog_header_bb
1628
1629        merge_bb_3:
1630
1631    Note this function peels prolog and epilog only if it's necessary,
1632    as well as guards.
1633    Returns created epilogue or NULL.
1634
1635    TODO: Guard for prefer_scalar_loop should be emitted along with
1636    versioning conditions if loop versioning is needed.  */
1637
1638
1639 struct loop *
1640 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1641                  tree *niters_vector, int th, bool check_profitability,
1642                  bool niters_no_overflow)
1643 {
1644   edge e, guard_e;
1645   tree type = TREE_TYPE (niters), guard_cond;
1646   basic_block guard_bb, guard_to;
1647   int prob_prolog, prob_vector, prob_epilog;
1648   int bound_prolog = 0, bound_scalar = 0, bound = 0;
1649   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1650   int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1651   bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1652                          || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1653
1654   if (!prolog_peeling && !epilog_peeling)
1655     return NULL;
1656
1657   prob_vector = 9 * REG_BR_PROB_BASE / 10;
1658   if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1659     vf = 3;
1660   prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
1661   vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1662
1663   struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
1664   struct loop *first_loop = loop;
1665   create_lcssa_for_virtual_phi (loop);
1666   update_ssa (TODO_update_ssa_only_virtuals);
1667
1668   if (MAY_HAVE_DEBUG_STMTS)
1669     {
1670       gcc_assert (!adjust_vec.exists ());
1671       adjust_vec.create (32);
1672     }
1673   initialize_original_copy_tables ();
1674
1675   /* Prolog loop may be skipped.  */
1676   bool skip_prolog = (prolog_peeling != 0);
1677   /* Skip to epilog if scalar loop may be preferred.  It's only used when
1678      we peel for epilog loop.  */
1679   bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1680   /* Epilog loop must be executed if the number of iterations for epilog
1681      loop is known at compile time, otherwise we need to add a check at
1682      the end of vector loop and skip to the end of epilog loop.  */
1683   bool skip_epilog = (prolog_peeling < 0
1684                       || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1685   /* PEELING_FOR_GAPS is special because epilog loop must be executed.  */
1686   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1687     skip_epilog = false;
1688
1689   /* Record the anchor bb at which guard should be placed if scalar loop
1690      may be preferred.  */
1691   basic_block anchor = loop_preheader_edge (loop)->src;
1692   if (skip_vector)
1693     split_edge (loop_preheader_edge (loop));
1694
1695   tree niters_prolog = build_int_cst (type, 0);
1696   source_location loop_loc = find_loop_location (loop);
1697   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1698   if (prolog_peeling)
1699     {
1700       e = loop_preheader_edge (loop);
1701       if (!slpeel_can_duplicate_loop_p (loop, e))
1702         {
1703           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1704                            "loop can't be duplicated to preheader edge.\n");
1705           gcc_unreachable ();
1706         }
1707       /* Peel prolog and put it on preheader edge of loop.  */
1708       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1709       if (!prolog)
1710         {
1711           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1712                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1713           gcc_unreachable ();
1714         }
1715       slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1716       first_loop = prolog;
1717       reset_original_copy_tables ();
1718
1719       /* Generate and update the number of iterations for prolog loop.  */
1720       niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1721                                                    &bound_prolog);
1722       slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1723
1724       /* Skip the prolog loop.  */
1725       if (skip_prolog)
1726         {
1727           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1728                                     niters_prolog, build_int_cst (type, 0));
1729           guard_bb = loop_preheader_edge (prolog)->src;
1730           guard_to = split_edge (loop_preheader_edge (loop));
1731           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1732                                            guard_to, guard_bb,
1733                                            inverse_probability (prob_prolog));
1734           e = EDGE_PRED (guard_to, 0);
1735           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1736           slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
1737           scale_loop_profile (prolog, prob_prolog, bound_prolog);
1738         }
1739       /* Update init address of DRs.  */
1740       vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1741       /* Update niters for vector loop.  */
1742       LOOP_VINFO_NITERS (loop_vinfo)
1743         = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1744       LOOP_VINFO_NITERSM1 (loop_vinfo)
1745         = fold_build2 (MINUS_EXPR, type,
1746                        LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
1747       niters = vect_build_loop_niters (loop_vinfo);
1748
1749       /* Prolog iterates at most bound_prolog times, latch iterates at
1750          most bound_prolog - 1 times.  */
1751       record_niter_bound (prolog, bound_prolog - 1, false, true);
1752       delete_update_ssa ();
1753       adjust_vec_debug_stmts ();
1754       scev_reset ();
1755     }
1756
1757   if (epilog_peeling)
1758     {
1759       e = single_exit (loop);
1760       if (!slpeel_can_duplicate_loop_p (loop, e))
1761         {
1762           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1763                            "loop can't be duplicated to exit edge.\n");
1764           gcc_unreachable ();
1765         }
1766       /* Peel epilog and put it on exit edge of loop.  */
1767       epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1768       if (!epilog)
1769         {
1770           dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1771                            "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1772           gcc_unreachable ();
1773         }
1774       slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1775
1776       /* Scalar version loop may be preferred.  In this case, add guard
1777          and skip to epilog.  Note this only happens when the number of
1778          iterations of loop is unknown at compile time, otherwise this
1779          won't be vectorized.  */
1780       if (skip_vector)
1781         {
1782           /* Additional epilogue iteration is peeled if gap exists.  */
1783           bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
1784           tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
1785                                                 bound_prolog,
1786                                                 peel_for_gaps ? vf : vf - 1,
1787                                                 th, &bound_scalar,
1788                                                 check_profitability);
1789           /* Build guard against NITERSM1 since NITERS may overflow.  */
1790           guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
1791           guard_bb = anchor;
1792           guard_to = split_edge (loop_preheader_edge (epilog));
1793           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1794                                            guard_to, guard_bb,
1795                                            inverse_probability (prob_vector));
1796           e = EDGE_PRED (guard_to, 0);
1797           e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1798           slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
1799           scale_loop_profile (epilog, prob_vector, bound_scalar);
1800         }
1801
1802       tree niters_vector_mult_vf;
1803       /* If loop is peeled for non-zero constant times, now niters refers to
1804          orig_niters - prolog_peeling, it won't overflow even the orig_niters
1805          overflows.  */
1806       niters_no_overflow |= (prolog_peeling > 0);
1807       vect_gen_vector_loop_niters (loop_vinfo, niters,
1808                                    niters_vector, niters_no_overflow);
1809       vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1810                                            &niters_vector_mult_vf);
1811       /* Update IVs of original loop as if they were advanced by
1812          niters_vector_mult_vf steps.  */
1813       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1814       edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1815       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1816                                         update_e);
1817
1818       if (skip_epilog)
1819         {
1820           guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1821                                     niters, niters_vector_mult_vf);
1822           guard_bb = single_exit (loop)->dest;
1823           guard_to = split_edge (single_exit (epilog));
1824           guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1825                                            skip_vector ? anchor : guard_bb,
1826                                            inverse_probability (prob_epilog));
1827           slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1828                                               single_exit (epilog));
1829           scale_loop_profile (epilog, prob_epilog, bound);
1830         }
1831       else
1832         slpeel_update_phi_nodes_for_lcssa (epilog);
1833
1834       bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
1835       /* We share epilog loop with scalar version loop.  */
1836       bound = MAX (bound, bound_scalar - 1);
1837       record_niter_bound (epilog, bound, false, true);
1838
1839       delete_update_ssa ();
1840       adjust_vec_debug_stmts ();
1841       scev_reset ();
1842     }
1843   adjust_vec.release ();
1844   free_original_copy_tables ();
1845
1846   return epilog;
1847 }
1848
1849 /* Function vect_create_cond_for_niters_checks.
1850
1851    Create a conditional expression that represents the run-time checks for
1852    loop's niter.  The loop is guaranteed to to terminate if the run-time
1853    checks hold.
1854
1855    Input:
1856    COND_EXPR  - input conditional expression.  New conditions will be chained
1857                 with logical AND operation.  If it is NULL, then the function
1858                 is used to return the number of alias checks.
1859    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1860                 to be checked.
1861
1862    Output:
1863    COND_EXPR - conditional expression.
1864
1865    The returned COND_EXPR is the conditional expression to be used in the
1866    if statement that controls which version of the loop gets executed at
1867    runtime.  */
1868
1869 static void
1870 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1871 {
1872   tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1873
1874   if (*cond_expr)
1875     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1876                               *cond_expr, part_cond_expr);
1877   else
1878     *cond_expr = part_cond_expr;
1879 }
1880
1881 /* Function vect_create_cond_for_align_checks.
1882
1883    Create a conditional expression that represents the alignment checks for
1884    all of data references (array element references) whose alignment must be
1885    checked at runtime.
1886
1887    Input:
1888    COND_EXPR  - input conditional expression.  New conditions will be chained
1889                 with logical AND operation.
1890    LOOP_VINFO - two fields of the loop information are used.
1891                 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1892                 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1893
1894    Output:
1895    COND_EXPR_STMT_LIST - statements needed to construct the conditional
1896                          expression.
1897    The returned value is the conditional expression to be used in the if
1898    statement that controls which version of the loop gets executed at runtime.
1899
1900    The algorithm makes two assumptions:
1901      1) The number of bytes "n" in a vector is a power of 2.
1902      2) An address "a" is aligned if a%n is zero and that this
1903         test can be done as a&(n-1) == 0.  For example, for 16
1904         byte vectors the test is a&0xf == 0.  */
1905
1906 static void
1907 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1908                                    tree *cond_expr,
1909                                    gimple_seq *cond_expr_stmt_list)
1910 {
1911   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1912   vec<gimple *> may_misalign_stmts
1913     = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1914   gimple *ref_stmt;
1915   int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1916   tree mask_cst;
1917   unsigned int i;
1918   tree int_ptrsize_type;
1919   char tmp_name[20];
1920   tree or_tmp_name = NULL_TREE;
1921   tree and_tmp_name;
1922   gimple *and_stmt;
1923   tree ptrsize_zero;
1924   tree part_cond_expr;
1925
1926   /* Check that mask is one less than a power of 2, i.e., mask is
1927      all zeros followed by all ones.  */
1928   gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1929
1930   int_ptrsize_type = signed_type_for (ptr_type_node);
1931
1932   /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1933      of the first vector of the i'th data reference. */
1934
1935   FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
1936     {
1937       gimple_seq new_stmt_list = NULL;
1938       tree addr_base;
1939       tree addr_tmp_name;
1940       tree new_or_tmp_name;
1941       gimple *addr_stmt, *or_stmt;
1942       stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
1943       tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1944       bool negative = tree_int_cst_compare
1945         (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
1946       tree offset = negative
1947         ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
1948
1949       /* create: addr_tmp = (int)(address_of_first_vector) */
1950       addr_base =
1951         vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
1952                                               offset, loop);
1953       if (new_stmt_list != NULL)
1954         gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
1955
1956       sprintf (tmp_name, "addr2int%d", i);
1957       addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1958       addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
1959       gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
1960
1961       /* The addresses are OR together.  */
1962
1963       if (or_tmp_name != NULL_TREE)
1964         {
1965           /* create: or_tmp = or_tmp | addr_tmp */
1966           sprintf (tmp_name, "orptrs%d", i);
1967           new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
1968           or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
1969                                          or_tmp_name, addr_tmp_name);
1970           gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
1971           or_tmp_name = new_or_tmp_name;
1972         }
1973       else
1974         or_tmp_name = addr_tmp_name;
1975
1976     } /* end for i */
1977
1978   mask_cst = build_int_cst (int_ptrsize_type, mask);
1979
1980   /* create: and_tmp = or_tmp & mask  */
1981   and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
1982
1983   and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
1984                                   or_tmp_name, mask_cst);
1985   gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
1986
1987   /* Make and_tmp the left operand of the conditional test against zero.
1988      if and_tmp has a nonzero bit then some address is unaligned.  */
1989   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
1990   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
1991                                 and_tmp_name, ptrsize_zero);
1992   if (*cond_expr)
1993     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1994                               *cond_expr, part_cond_expr);
1995   else
1996     *cond_expr = part_cond_expr;
1997 }
1998
1999 /* Given two data references and segment lengths described by DR_A and DR_B,
2000    create expression checking if the two addresses ranges intersect with
2001    each other based on index of the two addresses.  This can only be done
2002    if DR_A and DR_B referring to the same (array) object and the index is
2003    the only difference.  For example:
2004
2005                        DR_A                           DR_B
2006       data-ref         arr[i]                         arr[j]
2007       base_object      arr                            arr
2008       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2009
2010    The addresses and their index are like:
2011
2012         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2013      ------------------------------------------------------->
2014         |   |   |   |   |          |   |   |   |   |
2015      ------------------------------------------------------->
2016         i_0 ...         i_0+4      j_0 ...         j_0+4
2017
2018    We can create expression based on index rather than address:
2019
2020      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
2021
2022    Note evolution step of index needs to be considered in comparison.  */
2023
2024 static bool
2025 create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
2026                                      const dr_with_seg_len& dr_a,
2027                                      const dr_with_seg_len& dr_b)
2028 {
2029   if (integer_zerop (DR_STEP (dr_a.dr))
2030       || integer_zerop (DR_STEP (dr_b.dr))
2031       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2032     return false;
2033
2034   if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len))
2035     return false;
2036
2037   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2038     return false;
2039
2040   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2041     return false;
2042
2043   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2044     return false;
2045
2046   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2047
2048   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2049   unsigned HOST_WIDE_INT abs_step
2050     = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr)));
2051
2052   unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len);
2053   unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len);
2054   /* Infer the number of iterations with which the memory segment is accessed
2055      by DR.  In other words, alias is checked if memory segment accessed by
2056      DR_A in some iterations intersect with memory segment accessed by DR_B
2057      in the same amount iterations.
2058      Note segnment length is a linear function of number of iterations with
2059      DR_STEP as the coefficient.  */
2060   unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step;
2061   unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step;
2062
2063   unsigned int i;
2064   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2065   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2066     {
2067       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2068       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2069       /* Two indices must be the same if they are not scev, or not scev wrto
2070          current loop being vecorized.  */
2071       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2072           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2073           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2074           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2075         {
2076           if (operand_equal_p (access1, access2, 0))
2077             continue;
2078
2079           return false;
2080         }
2081       /* The two indices must have the same step.  */
2082       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2083         return false;
2084
2085       tree idx_step = CHREC_RIGHT (access1);
2086       /* Index must have const step, otherwise DR_STEP won't be constant.  */
2087       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2088       /* Index must evaluate in the same direction as DR.  */
2089       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2090
2091       tree min1 = CHREC_LEFT (access1);
2092       tree min2 = CHREC_LEFT (access2);
2093       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2094         return false;
2095
2096       /* Ideally, alias can be checked against loop's control IV, but we
2097          need to prove linear mapping between control IV and reference
2098          index.  Although that should be true, we check against (array)
2099          index of data reference.  Like segment length, index length is
2100          linear function of the number of iterations with index_step as
2101          the coefficient, i.e, niter_len * idx_step.  */
2102       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
2103                                    build_int_cst (TREE_TYPE (min1),
2104                                                   niter_len1));
2105       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
2106                                    build_int_cst (TREE_TYPE (min2),
2107                                                   niter_len2));
2108       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
2109       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
2110       /* Adjust ranges for negative step.  */
2111       if (neg_step)
2112         {
2113           min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step);
2114           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1),
2115                               CHREC_LEFT (access1), idx_step);
2116           min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step);
2117           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2),
2118                               CHREC_LEFT (access2), idx_step);
2119         }
2120       tree part_cond_expr
2121         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2122             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
2123             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
2124       if (*cond_expr)
2125         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2126                                   *cond_expr, part_cond_expr);
2127       else
2128         *cond_expr = part_cond_expr;
2129     }
2130   return true;
2131 }
2132
2133 /* Given two data references and segment lengths described by DR_A and DR_B,
2134    create expression checking if the two addresses ranges intersect with
2135    each other:
2136
2137      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
2138      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
2139
2140 static void
2141 create_intersect_range_checks (loop_vec_info loop_vinfo, tree *cond_expr,
2142                                const dr_with_seg_len& dr_a,
2143                                const dr_with_seg_len& dr_b)
2144 {
2145   *cond_expr = NULL_TREE;
2146   if (create_intersect_range_checks_index (loop_vinfo, cond_expr, dr_a, dr_b))
2147     return;
2148
2149   tree segment_length_a = dr_a.seg_len;
2150   tree segment_length_b = dr_b.seg_len;
2151   tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2152   tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2153   tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2154
2155   offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2156                           offset_a, DR_INIT (dr_a.dr));
2157   offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2158                           offset_b, DR_INIT (dr_b.dr));
2159   addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2160   addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
2161
2162   tree seg_a_min = addr_base_a;
2163   tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2164   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2165      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2166      [a, a+12) */
2167   if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
2168     {
2169       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2170       seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2171       seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2172     }
2173
2174   tree seg_b_min = addr_base_b;
2175   tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2176   if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
2177     {
2178       tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2179       seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2180       seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2181     }
2182   *cond_expr
2183     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2184         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2185         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2186 }
2187
2188 /* Function vect_create_cond_for_alias_checks.
2189
2190    Create a conditional expression that represents the run-time checks for
2191    overlapping of address ranges represented by a list of data references
2192    relations passed as input.
2193
2194    Input:
2195    COND_EXPR  - input conditional expression.  New conditions will be chained
2196                 with logical AND operation.  If it is NULL, then the function
2197                 is used to return the number of alias checks.
2198    LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2199                 to be checked.
2200
2201    Output:
2202    COND_EXPR - conditional expression.
2203
2204    The returned COND_EXPR is the conditional expression to be used in the if
2205    statement that controls which version of the loop gets executed at runtime.
2206 */
2207
2208 void
2209 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
2210 {
2211   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
2212     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2213   tree part_cond_expr;
2214
2215   if (comp_alias_ddrs.is_empty ())
2216     return;
2217
2218   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
2219     {
2220       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2221       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
2222
2223       if (dump_enabled_p ())
2224         {
2225           dump_printf_loc (MSG_NOTE, vect_location,
2226                            "create runtime check for data references ");
2227           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
2228           dump_printf (MSG_NOTE, " and ");
2229           dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2230           dump_printf (MSG_NOTE, "\n");
2231         }
2232
2233       /* Create condition expression for each pair data references.  */
2234       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
2235       if (*cond_expr)
2236         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2237                                   *cond_expr, part_cond_expr);
2238       else
2239         *cond_expr = part_cond_expr;
2240     }
2241
2242   if (dump_enabled_p ())
2243     dump_printf_loc (MSG_NOTE, vect_location,
2244                      "created %u versioning for alias checks.\n",
2245                      comp_alias_ddrs.length ());
2246 }
2247
2248
2249 /* Function vect_loop_versioning.
2250
2251    If the loop has data references that may or may not be aligned or/and
2252    has data reference relations whose independence was not proven then
2253    two versions of the loop need to be generated, one which is vectorized
2254    and one which isn't.  A test is then generated to control which of the
2255    loops is executed.  The test checks for the alignment of all of the
2256    data references that may or may not be aligned.  An additional
2257    sequence of runtime tests is generated for each pairs of DDRs whose
2258    independence was not proven.  The vectorized version of loop is
2259    executed only if both alias and alignment tests are passed.
2260
2261    The test generated to check which version of loop is executed
2262    is modified to also check for profitability as indicated by the
2263    cost model threshold TH.
2264
2265    The versioning precondition(s) are placed in *COND_EXPR and
2266    *COND_EXPR_STMT_LIST.  */
2267
2268 void
2269 vect_loop_versioning (loop_vec_info loop_vinfo,
2270                       unsigned int th, bool check_profitability)
2271 {
2272   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
2273   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2274   basic_block condition_bb;
2275   gphi_iterator gsi;
2276   gimple_stmt_iterator cond_exp_gsi;
2277   basic_block merge_bb;
2278   basic_block new_exit_bb;
2279   edge new_exit_e, e;
2280   gphi *orig_phi, *new_phi;
2281   tree cond_expr = NULL_TREE;
2282   gimple_seq cond_expr_stmt_list = NULL;
2283   tree arg;
2284   unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2285   gimple_seq gimplify_stmt_list = NULL;
2286   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2287   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2288   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
2289   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
2290
2291   if (check_profitability)
2292     cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2293                              build_int_cst (TREE_TYPE (scalar_loop_iters),
2294                                                        th));
2295
2296   if (version_niter)
2297     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2298
2299   if (cond_expr)
2300     cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2301                                         is_gimple_condexpr, NULL_TREE);
2302
2303   if (version_align)
2304     vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2305                                        &cond_expr_stmt_list);
2306
2307   if (version_alias)
2308     vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2309
2310   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2311                                       is_gimple_condexpr, NULL_TREE);
2312   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
2313
2314   initialize_original_copy_tables ();
2315   if (scalar_loop)
2316     {
2317       edge scalar_e;
2318       basic_block preheader, scalar_preheader;
2319
2320       /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2321          scale LOOP's frequencies instead.  */
2322       nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2323                             REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2324       scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2325       /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2326          while we need to move it above LOOP's preheader.  */
2327       e = loop_preheader_edge (loop);
2328       scalar_e = loop_preheader_edge (scalar_loop);
2329       gcc_assert (empty_block_p (e->src)
2330                   && single_pred_p (e->src));
2331       gcc_assert (empty_block_p (scalar_e->src)
2332                   && single_pred_p (scalar_e->src));
2333       gcc_assert (single_pred_p (condition_bb));
2334       preheader = e->src;
2335       scalar_preheader = scalar_e->src;
2336       scalar_e = find_edge (condition_bb, scalar_preheader);
2337       e = single_pred_edge (preheader);
2338       redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2339                                       scalar_preheader);
2340       redirect_edge_and_branch_force (scalar_e, preheader);
2341       redirect_edge_and_branch_force (e, condition_bb);
2342       set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2343                                single_pred (condition_bb));
2344       set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2345                                single_pred (scalar_preheader));
2346       set_immediate_dominator (CDI_DOMINATORS, preheader,
2347                                condition_bb);
2348     }
2349   else
2350     nloop = loop_version (loop, cond_expr, &condition_bb,
2351                           prob, prob, REG_BR_PROB_BASE - prob, true);
2352
2353   if (version_niter)
2354     {
2355       /* The versioned loop could be infinite, we need to clear existing
2356          niter information which is copied from the original loop.  */
2357       gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2358       vect_free_loop_info_assumptions (nloop);
2359       /* And set constraint LOOP_C_INFINITE for niter analyzer.  */
2360       loop_constraint_set (loop, LOOP_C_INFINITE);
2361     }
2362
2363   if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
2364       && dump_enabled_p ())
2365     {
2366       if (version_alias)
2367         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2368                          "loop versioned for vectorization because of "
2369                          "possible aliasing\n");
2370       if (version_align)
2371         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2372                          "loop versioned for vectorization to enhance "
2373                          "alignment\n");
2374
2375     }
2376   free_original_copy_tables ();
2377
2378   /* Loop versioning violates an assumption we try to maintain during
2379      vectorization - that the loop exit block has a single predecessor.
2380      After versioning, the exit block of both loop versions is the same
2381      basic block (i.e. it has two predecessors). Just in order to simplify
2382      following transformations in the vectorizer, we fix this situation
2383      here by adding a new (empty) block on the exit-edge of the loop,
2384      with the proper loop-exit phis to maintain loop-closed-form.
2385      If loop versioning wasn't done from loop, but scalar_loop instead,
2386      merge_bb will have already just a single successor.  */
2387
2388   merge_bb = single_exit (loop)->dest;
2389   if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
2390     {
2391       gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2392       new_exit_bb = split_edge (single_exit (loop));
2393       new_exit_e = single_exit (loop);
2394       e = EDGE_SUCC (new_exit_bb, 0);
2395
2396       for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2397         {
2398           tree new_res;
2399           orig_phi = gsi.phi ();
2400           new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2401           new_phi = create_phi_node (new_res, new_exit_bb);
2402           arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2403           add_phi_arg (new_phi, arg, new_exit_e,
2404                        gimple_phi_arg_location_from_edge (orig_phi, e));
2405           adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2406         }
2407     }
2408
2409   /* End loop-exit-fixes after versioning.  */
2410
2411   if (cond_expr_stmt_list)
2412     {
2413       cond_exp_gsi = gsi_last_bb (condition_bb);
2414       gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
2415                              GSI_SAME_STMT);
2416     }
2417   update_ssa (TODO_update_ssa);
2418 }