gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-ssa.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "expr.h"
  35 #include "recog.h"
  36 #include "optabs.h"
  37 #include "params.h"
  38 #include "diagnostic-core.h"
  39 #include "tree-chrec.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "tree-vectorizer.h"
  42 #include "target.h"
  43
  44 /* Loop Vectorization Pass.
  45
  46    This pass tries to vectorize loops.
  47
  48    For example, the vectorizer transforms the following simple loop:
  49
  50         short a[N]; short b[N]; short c[N]; int i;
  51
  52         for (i=0; i<N; i++){
  53           a[i] = b[i] + c[i];
  54         }
  55
  56    as if it was manually vectorized by rewriting the source code into:
  57
  58         typedef int __attribute__((mode(V8HI))) v8hi;
  59         short a[N];  short b[N]; short c[N];   int i;
  60         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  61         v8hi va, vb, vc;
  62
  63         for (i=0; i<N/8; i++){
  64           vb = pb[i];
  65           vc = pc[i];
  66           va = vb + vc;
  67           pa[i] = va;
  68         }
  69
  70         The main entry to this pass is vectorize_loops(), in which
  71    the vectorizer applies a set of analyses on a given set of loops,
  72    followed by the actual vectorization transformation for the loops that
  73    had successfully passed the analysis phase.
  74         Throughout this pass we make a distinction between two types of
  75    data: scalars (which are represented by SSA_NAMES), and memory references
  76    ("data-refs").  These two types of data require different handling both
  77    during analysis and transformation. The types of data-refs that the
  78    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  79    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  80    accesses are required to have a simple (consecutive) access pattern.
  81
  82    Analysis phase:
  83    ===============
  84         The driver for the analysis phase is vect_analyze_loop().
  85    It applies a set of analyses, some of which rely on the scalar evolution
  86    analyzer (scev) developed by Sebastian Pop.
  87
  88         During the analysis phase the vectorizer records some information
  89    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  90    loop, as well as general information about the loop as a whole, which is
  91    recorded in a "loop_vec_info" struct attached to each loop.
  92
  93    Transformation phase:
  94    =====================
  95         The loop transformation phase scans all the stmts in the loop, and
  96    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  97    the loop that needs to be vectorized.  It inserts the vector code sequence
  98    just before the scalar stmt S, and records a pointer to the vector code
  99    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 100    attached to S).  This pointer will be used for the vectorization of following
 101    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 102    otherwise, we rely on dead code elimination for removing it.
 103
 104         For example, say stmt S1 was vectorized into stmt VS1:
 105
 106    VS1: vb = px[i];
 107    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 108    S2:  a = b;
 109
 110    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 111    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 112    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 113    resulting sequence would be:
 114
 115    VS1: vb = px[i];
 116    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 117    VS2: va = vb;
 118    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 119
 120         Operands that are not SSA_NAMEs, are data-refs that appear in
 121    load/store operations (like 'x[i]' in S1), and are handled differently.
 122
 123    Target modeling:
 124    =================
 125         Currently the only target specific information that is used is the
 126    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 127    Targets that can support different sizes of vectors, for now will need
 128    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 129    flexibility will be added in the future.
 130
 131         Since we only vectorize operations which vector form can be
 132    expressed using existing tree codes, to verify that an operation is
 133    supported, the vectorizer checks the relevant optab at the relevant
 134    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 135    the value found is CODE_FOR_nothing, then there's no target support, and
 136    we can't vectorize the stmt.
 137
 138    For additional information on this project see:
 139    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 140 */
 141
 142 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 143
 144 /* Function vect_determine_vectorization_factor
 145
 146    Determine the vectorization factor (VF).  VF is the number of data elements
 147    that are operated upon in parallel in a single iteration of the vectorized
 148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 150    elements can fit in a single vector register.
 151
 152    We currently support vectorization of loops in which all types operated upon
 153    are of the same size.  Therefore this function currently sets VF according to
 154    the size of the types operated upon, and fails if there are multiple sizes
 155    in the loop.
 156
 157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 158    original loop:
 159         for (i=0; i<N; i++){
 160           a[i] = b[i] + c[i];
 161         }
 162
 163    vectorized loop:
 164         for (i=0; i<N; i+=VF){
 165           a[i:VF] = b[i:VF] + c[i:VF];
 166         }
 167 */
 168
 169 static bool
 170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 171 {
 172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 174   int nbbs = loop->num_nodes;
 175   gimple_stmt_iterator si;
 176   unsigned int vectorization_factor = 0;
 177   tree scalar_type;
 178   gimple phi;
 179   tree vectype;
 180   unsigned int nunits;
 181   stmt_vec_info stmt_info;
 182   int i;
 183   HOST_WIDE_INT dummy;
 184   gimple stmt, pattern_stmt = NULL;
 185   gimple_seq pattern_def_seq = NULL;
 186   gimple_stmt_iterator pattern_def_si = gsi_none ();
 187   bool analyze_pattern_stmt = false;
 188
 189   if (dump_enabled_p ())
 190     dump_printf_loc (MSG_NOTE, vect_location,
 191                      "=== vect_determine_vectorization_factor ===\n");
 192
 193   for (i = 0; i < nbbs; i++)
 194     {
 195       basic_block bb = bbs[i];
 196
 197       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 198         {
 199           phi = gsi_stmt (si);
 200           stmt_info = vinfo_for_stmt (phi);
 201           if (dump_enabled_p ())
 202             {
 203               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 204               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 205               dump_printf (MSG_NOTE, "\n");
 206             }
 207
 208           gcc_assert (stmt_info);
 209
 210           if (STMT_VINFO_RELEVANT_P (stmt_info))
 211             {
 212               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 213               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 214
 215               if (dump_enabled_p ())
 216                 {
 217                   dump_printf_loc (MSG_NOTE, vect_location,
 218                                    "get vectype for scalar type:  ");
 219                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 220                   dump_printf (MSG_NOTE, "\n");
 221                 }
 222
 223               vectype = get_vectype_for_scalar_type (scalar_type);
 224               if (!vectype)
 225                 {
 226                   if (dump_enabled_p ())
 227                     {
 228                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 229                                        "not vectorized: unsupported "
 230                                        "data-type ");
 231                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 232                                          scalar_type);
 233                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 234                     }
 235                   return false;
 236                 }
 237               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 238
 239               if (dump_enabled_p ())
 240                 {
 241                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 242                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 243                   dump_printf (MSG_NOTE, "\n");
 244                 }
 245
 246               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 247               if (dump_enabled_p ())
 248                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 249                                  nunits);
 250
 251               if (!vectorization_factor
 252                   || (nunits > vectorization_factor))
 253                 vectorization_factor = nunits;
 254             }
 255         }
 256
 257       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 258         {
 259           tree vf_vectype;
 260
 261           if (analyze_pattern_stmt)
 262             stmt = pattern_stmt;
 263           else
 264             stmt = gsi_stmt (si);
 265
 266           stmt_info = vinfo_for_stmt (stmt);
 267
 268           if (dump_enabled_p ())
 269             {
 270               dump_printf_loc (MSG_NOTE, vect_location,
 271                                "==> examining statement: ");
 272               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 273               dump_printf (MSG_NOTE, "\n");
 274             }
 275
 276           gcc_assert (stmt_info);
 277
 278           /* Skip stmts which do not need to be vectorized.  */
 279           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 280                && !STMT_VINFO_LIVE_P (stmt_info))
 281               || gimple_clobber_p (stmt))
 282             {
 283               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 284                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 285                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 286                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 287                 {
 288                   stmt = pattern_stmt;
 289                   stmt_info = vinfo_for_stmt (pattern_stmt);
 290                   if (dump_enabled_p ())
 291                     {
 292                       dump_printf_loc (MSG_NOTE, vect_location,
 293                                        "==> examining pattern statement: ");
 294                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 295                       dump_printf (MSG_NOTE, "\n");
 296                     }
 297                 }
 298               else
 299                 {
 300                   if (dump_enabled_p ())
 301                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 302                   gsi_next (&si);
 303                   continue;
 304                 }
 305             }
 306           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 307                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 308                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 309                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 310             analyze_pattern_stmt = true;
 311
 312           /* If a pattern statement has def stmts, analyze them too.  */
 313           if (is_pattern_stmt_p (stmt_info))
 314             {
 315               if (pattern_def_seq == NULL)
 316                 {
 317                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 318                   pattern_def_si = gsi_start (pattern_def_seq);
 319                 }
 320               else if (!gsi_end_p (pattern_def_si))
 321                 gsi_next (&pattern_def_si);
 322               if (pattern_def_seq != NULL)
 323                 {
 324                   gimple pattern_def_stmt = NULL;
 325                   stmt_vec_info pattern_def_stmt_info = NULL;
 326
 327                   while (!gsi_end_p (pattern_def_si))
 328                     {
 329                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 330                       pattern_def_stmt_info
 331                         = vinfo_for_stmt (pattern_def_stmt);
 332                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 333                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 334                         break;
 335                       gsi_next (&pattern_def_si);
 336                     }
 337
 338                   if (!gsi_end_p (pattern_def_si))
 339                     {
 340                       if (dump_enabled_p ())
 341                         {
 342                           dump_printf_loc (MSG_NOTE, vect_location,
 343                                            "==> examining pattern def stmt: ");
 344                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 345                                             pattern_def_stmt, 0);
 346                           dump_printf (MSG_NOTE, "\n");
 347                         }
 348
 349                       stmt = pattern_def_stmt;
 350                       stmt_info = pattern_def_stmt_info;
 351                     }
 352                   else
 353                     {
 354                       pattern_def_si = gsi_none ();
 355                       analyze_pattern_stmt = false;
 356                     }
 357                 }
 358               else
 359                 analyze_pattern_stmt = false;
 360             }
 361
 362           if (gimple_get_lhs (stmt) == NULL_TREE)
 363             {
 364               if (dump_enabled_p ())
 365                 {
 366                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 367                                    "not vectorized: irregular stmt.");
 368                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 369                                     0);
 370                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 371                 }
 372               return false;
 373             }
 374
 375           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 376             {
 377               if (dump_enabled_p ())
 378                 {
 379                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 380                                    "not vectorized: vector stmt in loop:");
 381                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 382                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 383                 }
 384               return false;
 385             }
 386
 387           if (STMT_VINFO_VECTYPE (stmt_info))
 388             {
 389               /* The only case when a vectype had been already set is for stmts
 390                  that contain a dataref, or for "pattern-stmts" (stmts
 391                  generated by the vectorizer to represent/replace a certain
 392                  idiom).  */
 393               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 394                           || is_pattern_stmt_p (stmt_info)
 395                           || !gsi_end_p (pattern_def_si));
 396               vectype = STMT_VINFO_VECTYPE (stmt_info);
 397             }
 398           else
 399             {
 400               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 401               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 402               if (dump_enabled_p ())
 403                 {
 404                   dump_printf_loc (MSG_NOTE, vect_location,
 405                                    "get vectype for scalar type:  ");
 406                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 407                   dump_printf (MSG_NOTE, "\n");
 408                 }
 409               vectype = get_vectype_for_scalar_type (scalar_type);
 410               if (!vectype)
 411                 {
 412                   if (dump_enabled_p ())
 413                     {
 414                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 415                                        "not vectorized: unsupported "
 416                                        "data-type ");
 417                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 418                                          scalar_type);
 419                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 420                     }
 421                   return false;
 422                 }
 423
 424               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 425
 426               if (dump_enabled_p ())
 427                 {
 428                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 429                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 430                   dump_printf (MSG_NOTE, "\n");
 431                 }
 432             }
 433
 434           /* The vectorization factor is according to the smallest
 435              scalar type (or the largest vector size, but we only
 436              support one vector size per loop).  */
 437           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 438                                                        &dummy);
 439           if (dump_enabled_p ())
 440             {
 441               dump_printf_loc (MSG_NOTE, vect_location,
 442                                "get vectype for scalar type:  ");
 443               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 444               dump_printf (MSG_NOTE, "\n");
 445             }
 446           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 447           if (!vf_vectype)
 448             {
 449               if (dump_enabled_p ())
 450                 {
 451                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 452                                    "not vectorized: unsupported data-type ");
 453                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 454                                      scalar_type);
 455                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 456                 }
 457               return false;
 458             }
 459
 460           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 461                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 462             {
 463               if (dump_enabled_p ())
 464                 {
 465                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 466                                    "not vectorized: different sized vector "
 467                                    "types in statement, ");
 468                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 469                                      vectype);
 470                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 471                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 472                                      vf_vectype);
 473                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 474                 }
 475               return false;
 476             }
 477
 478           if (dump_enabled_p ())
 479             {
 480               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 481               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 482               dump_printf (MSG_NOTE, "\n");
 483             }
 484
 485           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 486           if (dump_enabled_p ())
 487             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 488           if (!vectorization_factor
 489               || (nunits > vectorization_factor))
 490             vectorization_factor = nunits;
 491
 492           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 493             {
 494               pattern_def_seq = NULL;
 495               gsi_next (&si);
 496             }
 497         }
 498     }
 499
 500   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 501   if (dump_enabled_p ())
 502     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 503                      vectorization_factor);
 504   if (vectorization_factor <= 1)
 505     {
 506       if (dump_enabled_p ())
 507         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 508                          "not vectorized: unsupported data-type\n");
 509       return false;
 510     }
 511   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 512
 513   return true;
 514 }
 515
 516
 517 /* Function vect_is_simple_iv_evolution.
 518
 519    FORNOW: A simple evolution of an induction variables in the loop is
 520    considered a polynomial evolution.  */
 521
 522 static bool
 523 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 524                              tree * step)
 525 {
 526   tree init_expr;
 527   tree step_expr;
 528   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 529   basic_block bb;
 530
 531   /* When there is no evolution in this loop, the evolution function
 532      is not "simple".  */
 533   if (evolution_part == NULL_TREE)
 534     return false;
 535
 536   /* When the evolution is a polynomial of degree >= 2
 537      the evolution function is not "simple".  */
 538   if (tree_is_chrec (evolution_part))
 539     return false;
 540
 541   step_expr = evolution_part;
 542   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 543
 544   if (dump_enabled_p ())
 545     {
 546       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 547       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 548       dump_printf (MSG_NOTE, ",  init: ");
 549       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 550       dump_printf (MSG_NOTE, "\n");
 551     }
 552
 553   *init = init_expr;
 554   *step = step_expr;
 555
 556   if (TREE_CODE (step_expr) != INTEGER_CST
 557       && (TREE_CODE (step_expr) != SSA_NAME
 558           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 559               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 560           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 561               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 562                   || !flag_associative_math)))
 563       && (TREE_CODE (step_expr) != REAL_CST
 564           || !flag_associative_math))
 565     {
 566       if (dump_enabled_p ())
 567         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 568                          "step unknown.\n");
 569       return false;
 570     }
 571
 572   return true;
 573 }
 574
 575 /* Function vect_analyze_scalar_cycles_1.
 576
 577    Examine the cross iteration def-use cycles of scalar variables
 578    in LOOP.  LOOP_VINFO represents the loop that is now being
 579    considered for vectorization (can be LOOP, or an outer-loop
 580    enclosing LOOP).  */
 581
 582 static void
 583 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 584 {
 585   basic_block bb = loop->header;
 586   tree init, step;
 587   vec<gimple> worklist;
 588   worklist.create (64);
 589   gimple_stmt_iterator gsi;
 590   bool double_reduc;
 591
 592   if (dump_enabled_p ())
 593     dump_printf_loc (MSG_NOTE, vect_location,
 594                      "=== vect_analyze_scalar_cycles ===\n");
 595
 596   /* First - identify all inductions.  Reduction detection assumes that all the
 597      inductions have been identified, therefore, this order must not be
 598      changed.  */
 599   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 600     {
 601       gimple phi = gsi_stmt (gsi);
 602       tree access_fn = NULL;
 603       tree def = PHI_RESULT (phi);
 604       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 605
 606       if (dump_enabled_p ())
 607         {
 608           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 609           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 610           dump_printf (MSG_NOTE, "\n");
 611         }
 612
 613       /* Skip virtual phi's.  The data dependences that are associated with
 614          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 615       if (virtual_operand_p (def))
 616         continue;
 617
 618       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 619
 620       /* Analyze the evolution function.  */
 621       access_fn = analyze_scalar_evolution (loop, def);
 622       if (access_fn)
 623         {
 624           STRIP_NOPS (access_fn);
 625           if (dump_enabled_p ())
 626             {
 627               dump_printf_loc (MSG_NOTE, vect_location,
 628                                "Access function of PHI: ");
 629               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 630               dump_printf (MSG_NOTE, "\n");
 631             }
 632           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 633             = evolution_part_in_loop_num (access_fn, loop->num);
 634         }
 635
 636       if (!access_fn
 637           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 638           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 639               && TREE_CODE (step) != INTEGER_CST))
 640         {
 641           worklist.safe_push (phi);
 642           continue;
 643         }
 644
 645       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 646
 647       if (dump_enabled_p ())
 648         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 649       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 650     }
 651
 652
 653   /* Second - identify all reductions and nested cycles.  */
 654   while (worklist.length () > 0)
 655     {
 656       gimple phi = worklist.pop ();
 657       tree def = PHI_RESULT (phi);
 658       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 659       gimple reduc_stmt;
 660       bool nested_cycle;
 661
 662       if (dump_enabled_p ())
 663         {
 664           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 665           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 666           dump_printf (MSG_NOTE, "\n");
 667         }
 668
 669       gcc_assert (!virtual_operand_p (def)
 670                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 671
 672       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 673       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 674                                                 &double_reduc);
 675       if (reduc_stmt)
 676         {
 677           if (double_reduc)
 678             {
 679               if (dump_enabled_p ())
 680                 dump_printf_loc (MSG_NOTE, vect_location,
 681                                  "Detected double reduction.\n");
 682
 683               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 684               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 685                                                     vect_double_reduction_def;
 686             }
 687           else
 688             {
 689               if (nested_cycle)
 690                 {
 691                   if (dump_enabled_p ())
 692                     dump_printf_loc (MSG_NOTE, vect_location,
 693                                      "Detected vectorizable nested cycle.\n");
 694
 695                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 696                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 697                                                              vect_nested_cycle;
 698                 }
 699               else
 700                 {
 701                   if (dump_enabled_p ())
 702                     dump_printf_loc (MSG_NOTE, vect_location,
 703                                      "Detected reduction.\n");
 704
 705                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 706                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 707                                                            vect_reduction_def;
 708                   /* Store the reduction cycles for possible vectorization in
 709                      loop-aware SLP.  */
 710                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 711                 }
 712             }
 713         }
 714       else
 715         if (dump_enabled_p ())
 716           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 717                            "Unknown def-use cycle pattern.\n");
 718     }
 719
 720   worklist.release ();
 721 }
 722
 723
 724 /* Function vect_analyze_scalar_cycles.
 725
 726    Examine the cross iteration def-use cycles of scalar variables, by
 727    analyzing the loop-header PHIs of scalar variables.  Classify each
 728    cycle as one of the following: invariant, induction, reduction, unknown.
 729    We do that for the loop represented by LOOP_VINFO, and also to its
 730    inner-loop, if exists.
 731    Examples for scalar cycles:
 732
 733    Example1: reduction:
 734
 735               loop1:
 736               for (i=0; i<N; i++)
 737                  sum += a[i];
 738
 739    Example2: induction:
 740
 741               loop2:
 742               for (i=0; i<N; i++)
 743                  a[i] = i;  */
 744
 745 static void
 746 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 747 {
 748   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 749
 750   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 751
 752   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 753      Reductions in such inner-loop therefore have different properties than
 754      the reductions in the nest that gets vectorized:
 755      1. When vectorized, they are executed in the same order as in the original
 756         scalar loop, so we can't change the order of computation when
 757         vectorizing them.
 758      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 759         current checks are too strict.  */
 760
 761   if (loop->inner)
 762     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 763 }
 764
 765 /* Function vect_get_loop_niters.
 766
 767    Determine how many iterations the loop is executed.
 768    If an expression that represents the number of iterations
 769    can be constructed, place it in NUMBER_OF_ITERATIONS.
 770    Return the loop exit condition.  */
 771
 772 static gimple
 773 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 774 {
 775   tree niters;
 776
 777   if (dump_enabled_p ())
 778     dump_printf_loc (MSG_NOTE, vect_location,
 779                      "=== get_loop_niters ===\n");
 780   niters = number_of_exit_cond_executions (loop);
 781
 782   if (niters != NULL_TREE
 783       && niters != chrec_dont_know)
 784     {
 785       *number_of_iterations = niters;
 786
 787       if (dump_enabled_p ())
 788         {
 789           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 790           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 791           dump_printf (MSG_NOTE, "\n");
 792         }
 793     }
 794
 795   return get_loop_exit_condition (loop);
 796 }
 797
 798
 799 /* Function bb_in_loop_p
 800
 801    Used as predicate for dfs order traversal of the loop bbs.  */
 802
 803 static bool
 804 bb_in_loop_p (const_basic_block bb, const void *data)
 805 {
 806   const struct loop *const loop = (const struct loop *)data;
 807   if (flow_bb_inside_loop_p (loop, bb))
 808     return true;
 809   return false;
 810 }
 811
 812
 813 /* Function new_loop_vec_info.
 814
 815    Create and initialize a new loop_vec_info struct for LOOP, as well as
 816    stmt_vec_info structs for all the stmts in LOOP.  */
 817
 818 static loop_vec_info
 819 new_loop_vec_info (struct loop *loop)
 820 {
 821   loop_vec_info res;
 822   basic_block *bbs;
 823   gimple_stmt_iterator si;
 824   unsigned int i, nbbs;
 825
 826   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 827   LOOP_VINFO_LOOP (res) = loop;
 828
 829   bbs = get_loop_body (loop);
 830
 831   /* Create/Update stmt_info for all stmts in the loop.  */
 832   for (i = 0; i < loop->num_nodes; i++)
 833     {
 834       basic_block bb = bbs[i];
 835
 836       /* BBs in a nested inner-loop will have been already processed (because
 837          we will have called vect_analyze_loop_form for any nested inner-loop).
 838          Therefore, for stmts in an inner-loop we just want to update the
 839          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 840          loop_info of the outer-loop we are currently considering to vectorize
 841          (instead of the loop_info of the inner-loop).
 842          For stmts in other BBs we need to create a stmt_info from scratch.  */
 843       if (bb->loop_father != loop)
 844         {
 845           /* Inner-loop bb.  */
 846           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 847           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 848             {
 849               gimple phi = gsi_stmt (si);
 850               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 851               loop_vec_info inner_loop_vinfo =
 852                 STMT_VINFO_LOOP_VINFO (stmt_info);
 853               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 854               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 855             }
 856           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 857            {
 858               gimple stmt = gsi_stmt (si);
 859               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 860               loop_vec_info inner_loop_vinfo =
 861                  STMT_VINFO_LOOP_VINFO (stmt_info);
 862               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 863               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 864            }
 865         }
 866       else
 867         {
 868           /* bb in current nest.  */
 869           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 870             {
 871               gimple phi = gsi_stmt (si);
 872               gimple_set_uid (phi, 0);
 873               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 874             }
 875
 876           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 877             {
 878               gimple stmt = gsi_stmt (si);
 879               gimple_set_uid (stmt, 0);
 880               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 881             }
 882         }
 883     }
 884
 885   /* CHECKME: We want to visit all BBs before their successors (except for
 886      latch blocks, for which this assertion wouldn't hold).  In the simple
 887      case of the loop forms we allow, a dfs order of the BBs would the same
 888      as reversed postorder traversal, so we are safe.  */
 889
 890    free (bbs);
 891    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 892    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 893                               bbs, loop->num_nodes, loop);
 894    gcc_assert (nbbs == loop->num_nodes);
 895
 896   LOOP_VINFO_BBS (res) = bbs;
 897   LOOP_VINFO_NITERS (res) = NULL;
 898   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 899   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 900   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 901   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 902   LOOP_VINFO_VECT_FACTOR (res) = 0;
 903   LOOP_VINFO_LOOP_NEST (res).create (3);
 904   LOOP_VINFO_DATAREFS (res).create (10);
 905   LOOP_VINFO_DDRS (res).create (10 * 10);
 906   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 907   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 908              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 909   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 910              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 911   LOOP_VINFO_GROUPED_STORES (res).create (10);
 912   LOOP_VINFO_REDUCTIONS (res).create (10);
 913   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 914   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 915   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 916   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 917   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 918   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 919
 920   return res;
 921 }
 922
 923
 924 /* Function destroy_loop_vec_info.
 925
 926    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 927    stmts in the loop.  */
 928
 929 void
 930 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 931 {
 932   struct loop *loop;
 933   basic_block *bbs;
 934   int nbbs;
 935   gimple_stmt_iterator si;
 936   int j;
 937   vec<slp_instance> slp_instances;
 938   slp_instance instance;
 939   bool swapped;
 940
 941   if (!loop_vinfo)
 942     return;
 943
 944   loop = LOOP_VINFO_LOOP (loop_vinfo);
 945
 946   bbs = LOOP_VINFO_BBS (loop_vinfo);
 947   nbbs = clean_stmts ? loop->num_nodes : 0;
 948   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 949
 950   for (j = 0; j < nbbs; j++)
 951     {
 952       basic_block bb = bbs[j];
 953       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 954         free_stmt_vec_info (gsi_stmt (si));
 955
 956       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 957         {
 958           gimple stmt = gsi_stmt (si);
 959
 960           /* We may have broken canonical form by moving a constant
 961              into RHS1 of a commutative op.  Fix such occurrences.  */
 962           if (swapped && is_gimple_assign (stmt))
 963             {
 964               enum tree_code code = gimple_assign_rhs_code (stmt);
 965
 966               if ((code == PLUS_EXPR
 967                    || code == POINTER_PLUS_EXPR
 968                    || code == MULT_EXPR)
 969                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 970                 swap_tree_operands (stmt,
 971                                     gimple_assign_rhs1_ptr (stmt),
 972                                     gimple_assign_rhs2_ptr (stmt));
 973             }
 974
 975           /* Free stmt_vec_info.  */
 976           free_stmt_vec_info (stmt);
 977           gsi_next (&si);
 978         }
 979     }
 980
 981   free (LOOP_VINFO_BBS (loop_vinfo));
 982   vect_destroy_datarefs (loop_vinfo, NULL);
 983   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 984   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
 985   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
 986   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
 987   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 988   FOR_EACH_VEC_ELT (slp_instances, j, instance)
 989     vect_free_slp_instance (instance);
 990
 991   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
 992   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
 993   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
 994   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
 995
 996   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
 997     LOOP_VINFO_PEELING_HTAB (loop_vinfo).dispose ();
 998
 999   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1000
1001   free (loop_vinfo);
1002   loop->aux = NULL;
1003 }
1004
1005
1006 /* Function vect_analyze_loop_1.
1007
1008    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1009    for it. The different analyses will record information in the
1010    loop_vec_info struct.  This is a subset of the analyses applied in
1011    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1012    that is now considered for (outer-loop) vectorization.  */
1013
1014 static loop_vec_info
1015 vect_analyze_loop_1 (struct loop *loop)
1016 {
1017   loop_vec_info loop_vinfo;
1018
1019   if (dump_enabled_p ())
1020     dump_printf_loc (MSG_NOTE, vect_location,
1021                      "===== analyze_loop_nest_1 =====\n");
1022
1023   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1024
1025   loop_vinfo = vect_analyze_loop_form (loop);
1026   if (!loop_vinfo)
1027     {
1028       if (dump_enabled_p ())
1029         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1030                          "bad inner-loop form.\n");
1031       return NULL;
1032     }
1033
1034   return loop_vinfo;
1035 }
1036
1037
1038 /* Function vect_analyze_loop_form.
1039
1040    Verify that certain CFG restrictions hold, including:
1041    - the loop has a pre-header
1042    - the loop has a single entry and exit
1043    - the loop exit condition is simple enough, and the number of iterations
1044      can be analyzed (a countable loop).  */
1045
1046 loop_vec_info
1047 vect_analyze_loop_form (struct loop *loop)
1048 {
1049   loop_vec_info loop_vinfo;
1050   gimple loop_cond;
1051   tree number_of_iterations = NULL;
1052   loop_vec_info inner_loop_vinfo = NULL;
1053
1054   if (dump_enabled_p ())
1055     dump_printf_loc (MSG_NOTE, vect_location,
1056                      "=== vect_analyze_loop_form ===\n");
1057
1058   /* Different restrictions apply when we are considering an inner-most loop,
1059      vs. an outer (nested) loop.
1060      (FORNOW. May want to relax some of these restrictions in the future).  */
1061
1062   if (!loop->inner)
1063     {
1064       /* Inner-most loop.  We currently require that the number of BBs is
1065          exactly 2 (the header and latch).  Vectorizable inner-most loops
1066          look like this:
1067
1068                         (pre-header)
1069                            |
1070                           header <--------+
1071                            | |            |
1072                            | +--> latch --+
1073                            |
1074                         (exit-bb)  */
1075
1076       if (loop->num_nodes != 2)
1077         {
1078           if (dump_enabled_p ())
1079             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1080                              "not vectorized: control flow in loop.\n");
1081           return NULL;
1082         }
1083
1084       if (empty_block_p (loop->header))
1085     {
1086           if (dump_enabled_p ())
1087             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1088                              "not vectorized: empty loop.\n");
1089       return NULL;
1090     }
1091     }
1092   else
1093     {
1094       struct loop *innerloop = loop->inner;
1095       edge entryedge;
1096
1097       /* Nested loop. We currently require that the loop is doubly-nested,
1098          contains a single inner loop, and the number of BBs is exactly 5.
1099          Vectorizable outer-loops look like this:
1100
1101                         (pre-header)
1102                            |
1103                           header <---+
1104                            |         |
1105                           inner-loop |
1106                            |         |
1107                           tail ------+
1108                            |
1109                         (exit-bb)
1110
1111          The inner-loop has the properties expected of inner-most loops
1112          as described above.  */
1113
1114       if ((loop->inner)->inner || (loop->inner)->next)
1115         {
1116           if (dump_enabled_p ())
1117             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1118                              "not vectorized: multiple nested loops.\n");
1119           return NULL;
1120         }
1121
1122       /* Analyze the inner-loop.  */
1123       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1124       if (!inner_loop_vinfo)
1125         {
1126           if (dump_enabled_p ())
1127             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1128                              "not vectorized: Bad inner loop.\n");
1129           return NULL;
1130         }
1131
1132       if (!expr_invariant_in_loop_p (loop,
1133                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1134         {
1135           if (dump_enabled_p ())
1136             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137                              "not vectorized: inner-loop count not"
1138                              " invariant.\n");
1139           destroy_loop_vec_info (inner_loop_vinfo, true);
1140           return NULL;
1141         }
1142
1143       if (loop->num_nodes != 5)
1144         {
1145           if (dump_enabled_p ())
1146             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1147                              "not vectorized: control flow in loop.\n");
1148           destroy_loop_vec_info (inner_loop_vinfo, true);
1149           return NULL;
1150         }
1151
1152       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1153       entryedge = EDGE_PRED (innerloop->header, 0);
1154       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1155         entryedge = EDGE_PRED (innerloop->header, 1);
1156
1157       if (entryedge->src != loop->header
1158           || !single_exit (innerloop)
1159           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1160         {
1161           if (dump_enabled_p ())
1162             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1163                              "not vectorized: unsupported outerloop form.\n");
1164           destroy_loop_vec_info (inner_loop_vinfo, true);
1165           return NULL;
1166         }
1167
1168       if (dump_enabled_p ())
1169         dump_printf_loc (MSG_NOTE, vect_location,
1170                          "Considering outer-loop vectorization.\n");
1171     }
1172
1173   if (!single_exit (loop)
1174       || EDGE_COUNT (loop->header->preds) != 2)
1175     {
1176       if (dump_enabled_p ())
1177         {
1178           if (!single_exit (loop))
1179             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1180                              "not vectorized: multiple exits.\n");
1181           else if (EDGE_COUNT (loop->header->preds) != 2)
1182             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1183                              "not vectorized: too many incoming edges.\n");
1184         }
1185       if (inner_loop_vinfo)
1186         destroy_loop_vec_info (inner_loop_vinfo, true);
1187       return NULL;
1188     }
1189
1190   /* We assume that the loop exit condition is at the end of the loop. i.e,
1191      that the loop is represented as a do-while (with a proper if-guard
1192      before the loop if needed), where the loop header contains all the
1193      executable statements, and the latch is empty.  */
1194   if (!empty_block_p (loop->latch)
1195       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1196     {
1197       if (dump_enabled_p ())
1198         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1199                          "not vectorized: latch block not empty.\n");
1200       if (inner_loop_vinfo)
1201         destroy_loop_vec_info (inner_loop_vinfo, true);
1202       return NULL;
1203     }
1204
1205   /* Make sure there exists a single-predecessor exit bb:  */
1206   if (!single_pred_p (single_exit (loop)->dest))
1207     {
1208       edge e = single_exit (loop);
1209       if (!(e->flags & EDGE_ABNORMAL))
1210         {
1211           split_loop_exit_edge (e);
1212           if (dump_enabled_p ())
1213             dump_printf (MSG_NOTE, "split exit edge.\n");
1214         }
1215       else
1216         {
1217           if (dump_enabled_p ())
1218             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1219                              "not vectorized: abnormal loop exit edge.\n");
1220           if (inner_loop_vinfo)
1221             destroy_loop_vec_info (inner_loop_vinfo, true);
1222           return NULL;
1223         }
1224     }
1225
1226   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1227   if (!loop_cond)
1228     {
1229       if (dump_enabled_p ())
1230         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1231                          "not vectorized: complicated exit condition.\n");
1232       if (inner_loop_vinfo)
1233         destroy_loop_vec_info (inner_loop_vinfo, true);
1234       return NULL;
1235     }
1236
1237   if (!number_of_iterations)
1238     {
1239       if (dump_enabled_p ())
1240         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1241                          "not vectorized: number of iterations cannot be "
1242                          "computed.\n");
1243       if (inner_loop_vinfo)
1244         destroy_loop_vec_info (inner_loop_vinfo, true);
1245       return NULL;
1246     }
1247
1248   if (chrec_contains_undetermined (number_of_iterations))
1249     {
1250       if (dump_enabled_p ())
1251             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1252                              "Infinite number of iterations.\n");
1253       if (inner_loop_vinfo)
1254         destroy_loop_vec_info (inner_loop_vinfo, true);
1255       return NULL;
1256     }
1257
1258   if (!NITERS_KNOWN_P (number_of_iterations))
1259     {
1260       if (dump_enabled_p ())
1261         {
1262           dump_printf_loc (MSG_NOTE, vect_location,
1263                            "Symbolic number of iterations is ");
1264           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1265           dump_printf (MSG_NOTE, "\n");
1266         }
1267     }
1268   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1269     {
1270       if (dump_enabled_p ())
1271         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1272                          "not vectorized: number of iterations = 0.\n");
1273       if (inner_loop_vinfo)
1274         destroy_loop_vec_info (inner_loop_vinfo, true);
1275       return NULL;
1276     }
1277
1278   loop_vinfo = new_loop_vec_info (loop);
1279   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1280   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1281
1282   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1283
1284   /* CHECKME: May want to keep it around it in the future.  */
1285   if (inner_loop_vinfo)
1286     destroy_loop_vec_info (inner_loop_vinfo, false);
1287
1288   gcc_assert (!loop->aux);
1289   loop->aux = loop_vinfo;
1290   return loop_vinfo;
1291 }
1292
1293
1294 /* Function vect_analyze_loop_operations.
1295
1296    Scan the loop stmts and make sure they are all vectorizable.  */
1297
1298 static bool
1299 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1300 {
1301   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1302   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1303   int nbbs = loop->num_nodes;
1304   gimple_stmt_iterator si;
1305   unsigned int vectorization_factor = 0;
1306   int i;
1307   gimple phi;
1308   stmt_vec_info stmt_info;
1309   bool need_to_vectorize = false;
1310   int min_profitable_iters;
1311   int min_scalar_loop_bound;
1312   unsigned int th;
1313   bool only_slp_in_loop = true, ok;
1314   HOST_WIDE_INT max_niter;
1315   HOST_WIDE_INT estimated_niter;
1316   int min_profitable_estimate;
1317
1318   if (dump_enabled_p ())
1319     dump_printf_loc (MSG_NOTE, vect_location,
1320                      "=== vect_analyze_loop_operations ===\n");
1321
1322   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1323   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1324   if (slp)
1325     {
1326       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1327          vectorization factor of the loop is the unrolling factor required by
1328          the SLP instances.  If that unrolling factor is 1, we say, that we
1329          perform pure SLP on loop - cross iteration parallelism is not
1330          exploited.  */
1331       for (i = 0; i < nbbs; i++)
1332         {
1333           basic_block bb = bbs[i];
1334           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1335             {
1336               gimple stmt = gsi_stmt (si);
1337               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1338               gcc_assert (stmt_info);
1339               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1340                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1341                   && !PURE_SLP_STMT (stmt_info))
1342                 /* STMT needs both SLP and loop-based vectorization.  */
1343                 only_slp_in_loop = false;
1344             }
1345         }
1346
1347       if (only_slp_in_loop)
1348         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1349       else
1350         vectorization_factor = least_common_multiple (vectorization_factor,
1351                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1352
1353       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1354       if (dump_enabled_p ())
1355         dump_printf_loc (MSG_NOTE, vect_location,
1356                          "Updating vectorization factor to %d\n",
1357                          vectorization_factor);
1358     }
1359
1360   for (i = 0; i < nbbs; i++)
1361     {
1362       basic_block bb = bbs[i];
1363
1364       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1365         {
1366           phi = gsi_stmt (si);
1367           ok = true;
1368
1369           stmt_info = vinfo_for_stmt (phi);
1370           if (dump_enabled_p ())
1371             {
1372               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1373               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1374               dump_printf (MSG_NOTE, "\n");
1375             }
1376
1377           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1378              (i.e., a phi in the tail of the outer-loop).  */
1379           if (! is_loop_header_bb_p (bb))
1380             {
1381               /* FORNOW: we currently don't support the case that these phis
1382                  are not used in the outerloop (unless it is double reduction,
1383                  i.e., this phi is vect_reduction_def), cause this case
1384                  requires to actually do something here.  */
1385               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1386                    || STMT_VINFO_LIVE_P (stmt_info))
1387                   && STMT_VINFO_DEF_TYPE (stmt_info)
1388                      != vect_double_reduction_def)
1389                 {
1390                   if (dump_enabled_p ())
1391                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392                                      "Unsupported loop-closed phi in "
1393                                      "outer-loop.\n");
1394                   return false;
1395                 }
1396
1397               /* If PHI is used in the outer loop, we check that its operand
1398                  is defined in the inner loop.  */
1399               if (STMT_VINFO_RELEVANT_P (stmt_info))
1400                 {
1401                   tree phi_op;
1402                   gimple op_def_stmt;
1403
1404                   if (gimple_phi_num_args (phi) != 1)
1405                     return false;
1406
1407                   phi_op = PHI_ARG_DEF (phi, 0);
1408                   if (TREE_CODE (phi_op) != SSA_NAME)
1409                     return false;
1410
1411                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1412                   if (gimple_nop_p (op_def_stmt)
1413                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1414                       || !vinfo_for_stmt (op_def_stmt))
1415                     return false;
1416
1417                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1418                         != vect_used_in_outer
1419                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1420                            != vect_used_in_outer_by_reduction)
1421                     return false;
1422                 }
1423
1424               continue;
1425             }
1426
1427           gcc_assert (stmt_info);
1428
1429           if (STMT_VINFO_LIVE_P (stmt_info))
1430             {
1431               /* FORNOW: not yet supported.  */
1432               if (dump_enabled_p ())
1433                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1434                                  "not vectorized: value used after loop.\n");
1435               return false;
1436             }
1437
1438           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1439               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1440             {
1441               /* A scalar-dependence cycle that we don't support.  */
1442               if (dump_enabled_p ())
1443                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1444                                  "not vectorized: scalar dependence cycle.\n");
1445               return false;
1446             }
1447
1448           if (STMT_VINFO_RELEVANT_P (stmt_info))
1449             {
1450               need_to_vectorize = true;
1451               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1452                 ok = vectorizable_induction (phi, NULL, NULL);
1453             }
1454
1455           if (!ok)
1456             {
1457               if (dump_enabled_p ())
1458                 {
1459                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1460                                    "not vectorized: relevant phi not "
1461                                    "supported: ");
1462                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1463                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1464                 }
1465               return false;
1466             }
1467         }
1468
1469       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1470         {
1471           gimple stmt = gsi_stmt (si);
1472           if (!gimple_clobber_p (stmt)
1473               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1474             return false;
1475         }
1476     } /* bbs */
1477
1478   /* All operations in the loop are either irrelevant (deal with loop
1479      control, or dead), or only used outside the loop and can be moved
1480      out of the loop (e.g. invariants, inductions).  The loop can be
1481      optimized away by scalar optimizations.  We're better off not
1482      touching this loop.  */
1483   if (!need_to_vectorize)
1484     {
1485       if (dump_enabled_p ())
1486         dump_printf_loc (MSG_NOTE, vect_location,
1487                          "All the computation can be taken out of the loop.\n");
1488       if (dump_enabled_p ())
1489         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1490                          "not vectorized: redundant loop. no profit to "
1491                          "vectorize.\n");
1492       return false;
1493     }
1494
1495   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1496     dump_printf_loc (MSG_NOTE, vect_location,
1497                      "vectorization_factor = %d, niters = "
1498                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1499                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1500
1501   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1502        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1503       || ((max_niter = max_stmt_executions_int (loop)) != -1
1504           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1505     {
1506       if (dump_enabled_p ())
1507         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1508                          "not vectorized: iteration count too small.\n");
1509       if (dump_enabled_p ())
1510         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1511                          "not vectorized: iteration count smaller than "
1512                          "vectorization factor.\n");
1513       return false;
1514     }
1515
1516   /* Analyze cost.  Decide if worth while to vectorize.  */
1517
1518   /* Once VF is set, SLP costs should be updated since the number of created
1519      vector stmts depends on VF.  */
1520   vect_update_slp_costs_according_to_vf (loop_vinfo);
1521
1522   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1523                                       &min_profitable_estimate);
1524   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1525
1526   if (min_profitable_iters < 0)
1527     {
1528       if (dump_enabled_p ())
1529         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1530                          "not vectorized: vectorization not profitable.\n");
1531       if (dump_enabled_p ())
1532         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533                          "not vectorized: vector version will never be "
1534                          "profitable.\n");
1535       return false;
1536     }
1537
1538   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1539                             * vectorization_factor) - 1);
1540
1541
1542   /* Use the cost model only if it is more conservative than user specified
1543      threshold.  */
1544
1545   th = (unsigned) min_scalar_loop_bound;
1546   if (min_profitable_iters
1547       && (!min_scalar_loop_bound
1548           || min_profitable_iters > min_scalar_loop_bound))
1549     th = (unsigned) min_profitable_iters;
1550
1551   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1552       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1553     {
1554       if (dump_enabled_p ())
1555         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1556                          "not vectorized: vectorization not profitable.\n");
1557       if (dump_enabled_p ())
1558         dump_printf_loc (MSG_NOTE, vect_location,
1559                          "not vectorized: iteration count smaller than user "
1560                          "specified loop bound parameter or minimum profitable "
1561                          "iterations (whichever is more conservative).\n");
1562       return false;
1563     }
1564
1565   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1566       && ((unsigned HOST_WIDE_INT) estimated_niter
1567           <= MAX (th, (unsigned)min_profitable_estimate)))
1568     {
1569       if (dump_enabled_p ())
1570         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1571                          "not vectorized: estimated iteration count too "
1572                          "small.\n");
1573       if (dump_enabled_p ())
1574         dump_printf_loc (MSG_NOTE, vect_location,
1575                          "not vectorized: estimated iteration count smaller "
1576                          "than specified loop bound parameter or minimum "
1577                          "profitable iterations (whichever is more "
1578                          "conservative).\n");
1579       return false;
1580     }
1581
1582   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1583       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1584       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1585     {
1586       if (dump_enabled_p ())
1587         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
1588       if (!vect_can_advance_ivs_p (loop_vinfo))
1589         {
1590           if (dump_enabled_p ())
1591             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1592                              "not vectorized: can't create epilog loop 1.\n");
1593           return false;
1594         }
1595       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1596         {
1597           if (dump_enabled_p ())
1598             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599                              "not vectorized: can't create epilog loop 2.\n");
1600           return false;
1601         }
1602     }
1603
1604   return true;
1605 }
1606
1607
1608 /* Function vect_analyze_loop_2.
1609
1610    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1611    for it.  The different analyses will record information in the
1612    loop_vec_info struct.  */
1613 static bool
1614 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1615 {
1616   bool ok, slp = false;
1617   int max_vf = MAX_VECTORIZATION_FACTOR;
1618   int min_vf = 2;
1619
1620   /* Find all data references in the loop (which correspond to vdefs/vuses)
1621      and analyze their evolution in the loop.  Also adjust the minimal
1622      vectorization factor according to the loads and stores.
1623
1624      FORNOW: Handle only simple, array references, which
1625      alignment can be forced, and aligned pointer-references.  */
1626
1627   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1628   if (!ok)
1629     {
1630       if (dump_enabled_p ())
1631         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1632                          "bad data references.\n");
1633       return false;
1634     }
1635
1636   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1637      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1638
1639   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1640   if (!ok)
1641     {
1642       if (dump_enabled_p ())
1643         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1644                          "bad data access.\n");
1645       return false;
1646     }
1647
1648   /* Classify all cross-iteration scalar data-flow cycles.
1649      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1650
1651   vect_analyze_scalar_cycles (loop_vinfo);
1652
1653   vect_pattern_recog (loop_vinfo, NULL);
1654
1655   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1656
1657   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1658   if (!ok)
1659     {
1660       if (dump_enabled_p ())
1661         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1662                          "unexpected pattern.\n");
1663       return false;
1664     }
1665
1666   /* Analyze data dependences between the data-refs in the loop
1667      and adjust the maximum vectorization factor according to
1668      the dependences.
1669      FORNOW: fail at the first data dependence that we encounter.  */
1670
1671   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1672   if (!ok
1673       || max_vf < min_vf)
1674     {
1675       if (dump_enabled_p ())
1676             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1677                              "bad data dependence.\n");
1678       return false;
1679     }
1680
1681   ok = vect_determine_vectorization_factor (loop_vinfo);
1682   if (!ok)
1683     {
1684       if (dump_enabled_p ())
1685         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                          "can't determine vectorization factor.\n");
1687       return false;
1688     }
1689   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1690     {
1691       if (dump_enabled_p ())
1692         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1693                          "bad data dependence.\n");
1694       return false;
1695     }
1696
1697   /* Analyze the alignment of the data-refs in the loop.
1698      Fail if a data reference is found that cannot be vectorized.  */
1699
1700   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1701   if (!ok)
1702     {
1703       if (dump_enabled_p ())
1704         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1705                          "bad data alignment.\n");
1706       return false;
1707     }
1708
1709   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1710      It is important to call pruning after vect_analyze_data_ref_accesses,
1711      since we use grouping information gathered by interleaving analysis.  */
1712   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1713   if (!ok)
1714     {
1715       if (dump_enabled_p ())
1716         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1717                          "too long list of versioning for alias "
1718                          "run-time tests.\n");
1719       return false;
1720     }
1721
1722   /* This pass will decide on using loop versioning and/or loop peeling in
1723      order to enhance the alignment of data references in the loop.  */
1724
1725   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1726   if (!ok)
1727     {
1728       if (dump_enabled_p ())
1729         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1730                          "bad data alignment.\n");
1731       return false;
1732     }
1733
1734   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1735   ok = vect_analyze_slp (loop_vinfo, NULL);
1736   if (ok)
1737     {
1738       /* Decide which possible SLP instances to SLP.  */
1739       slp = vect_make_slp_decision (loop_vinfo);
1740
1741       /* Find stmts that need to be both vectorized and SLPed.  */
1742       vect_detect_hybrid_slp (loop_vinfo);
1743     }
1744   else
1745     return false;
1746
1747   /* Scan all the operations in the loop and make sure they are
1748      vectorizable.  */
1749
1750   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1751   if (!ok)
1752     {
1753       if (dump_enabled_p ())
1754         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1755                          "bad operation or unsupported loop bound.\n");
1756       return false;
1757     }
1758
1759   return true;
1760 }
1761
1762 /* Function vect_analyze_loop.
1763
1764    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1765    for it.  The different analyses will record information in the
1766    loop_vec_info struct.  */
1767 loop_vec_info
1768 vect_analyze_loop (struct loop *loop)
1769 {
1770   loop_vec_info loop_vinfo;
1771   unsigned int vector_sizes;
1772
1773   /* Autodetect first vector size we try.  */
1774   current_vector_size = 0;
1775   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1776
1777   if (dump_enabled_p ())
1778     dump_printf_loc (MSG_NOTE, vect_location,
1779                      "===== analyze_loop_nest =====\n");
1780
1781   if (loop_outer (loop)
1782       && loop_vec_info_for_loop (loop_outer (loop))
1783       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1784     {
1785       if (dump_enabled_p ())
1786         dump_printf_loc (MSG_NOTE, vect_location,
1787                          "outer-loop already vectorized.\n");
1788       return NULL;
1789     }
1790
1791   while (1)
1792     {
1793       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1794       loop_vinfo = vect_analyze_loop_form (loop);
1795       if (!loop_vinfo)
1796         {
1797           if (dump_enabled_p ())
1798             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1799                              "bad loop form.\n");
1800           return NULL;
1801         }
1802
1803       if (vect_analyze_loop_2 (loop_vinfo))
1804         {
1805           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1806
1807           return loop_vinfo;
1808         }
1809
1810       destroy_loop_vec_info (loop_vinfo, true);
1811
1812       vector_sizes &= ~current_vector_size;
1813       if (vector_sizes == 0
1814           || current_vector_size == 0)
1815         return NULL;
1816
1817       /* Try the next biggest vector size.  */
1818       current_vector_size = 1 << floor_log2 (vector_sizes);
1819       if (dump_enabled_p ())
1820         dump_printf_loc (MSG_NOTE, vect_location,
1821                          "***** Re-trying analysis with "
1822                          "vector size %d\n", current_vector_size);
1823     }
1824 }
1825
1826
1827 /* Function reduction_code_for_scalar_code
1828
1829    Input:
1830    CODE - tree_code of a reduction operations.
1831
1832    Output:
1833    REDUC_CODE - the corresponding tree-code to be used to reduce the
1834       vector of partial results into a single scalar result (which
1835       will also reside in a vector) or ERROR_MARK if the operation is
1836       a supported reduction operation, but does not have such tree-code.
1837
1838    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1839
1840 static bool
1841 reduction_code_for_scalar_code (enum tree_code code,
1842                                 enum tree_code *reduc_code)
1843 {
1844   switch (code)
1845     {
1846       case MAX_EXPR:
1847         *reduc_code = REDUC_MAX_EXPR;
1848         return true;
1849
1850       case MIN_EXPR:
1851         *reduc_code = REDUC_MIN_EXPR;
1852         return true;
1853
1854       case PLUS_EXPR:
1855         *reduc_code = REDUC_PLUS_EXPR;
1856         return true;
1857
1858       case MULT_EXPR:
1859       case MINUS_EXPR:
1860       case BIT_IOR_EXPR:
1861       case BIT_XOR_EXPR:
1862       case BIT_AND_EXPR:
1863         *reduc_code = ERROR_MARK;
1864         return true;
1865
1866       default:
1867        return false;
1868     }
1869 }
1870
1871
1872 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1873    STMT is printed with a message MSG. */
1874
1875 static void
1876 report_vect_op (int msg_type, gimple stmt, const char *msg)
1877 {
1878   dump_printf_loc (msg_type, vect_location, "%s", msg);
1879   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1880   dump_printf (msg_type, "\n");
1881 }
1882
1883
1884 /* Detect SLP reduction of the form:
1885
1886    #a1 = phi <a5, a0>
1887    a2 = operation (a1)
1888    a3 = operation (a2)
1889    a4 = operation (a3)
1890    a5 = operation (a4)
1891
1892    #a = phi <a5>
1893
1894    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1895    FIRST_STMT is the first reduction stmt in the chain
1896    (a2 = operation (a1)).
1897
1898    Return TRUE if a reduction chain was detected.  */
1899
1900 static bool
1901 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1902 {
1903   struct loop *loop = (gimple_bb (phi))->loop_father;
1904   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1905   enum tree_code code;
1906   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1907   stmt_vec_info use_stmt_info, current_stmt_info;
1908   tree lhs;
1909   imm_use_iterator imm_iter;
1910   use_operand_p use_p;
1911   int nloop_uses, size = 0, n_out_of_loop_uses;
1912   bool found = false;
1913
1914   if (loop != vect_loop)
1915     return false;
1916
1917   lhs = PHI_RESULT (phi);
1918   code = gimple_assign_rhs_code (first_stmt);
1919   while (1)
1920     {
1921       nloop_uses = 0;
1922       n_out_of_loop_uses = 0;
1923       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1924         {
1925           gimple use_stmt = USE_STMT (use_p);
1926           if (is_gimple_debug (use_stmt))
1927             continue;
1928
1929           use_stmt = USE_STMT (use_p);
1930
1931           /* Check if we got back to the reduction phi.  */
1932           if (use_stmt == phi)
1933             {
1934               loop_use_stmt = use_stmt;
1935               found = true;
1936               break;
1937             }
1938
1939           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1940             {
1941               if (vinfo_for_stmt (use_stmt)
1942                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1943                 {
1944                   loop_use_stmt = use_stmt;
1945                   nloop_uses++;
1946                 }
1947             }
1948            else
1949              n_out_of_loop_uses++;
1950
1951            /* There are can be either a single use in the loop or two uses in
1952               phi nodes.  */
1953            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1954              return false;
1955         }
1956
1957       if (found)
1958         break;
1959
1960       /* We reached a statement with no loop uses.  */
1961       if (nloop_uses == 0)
1962         return false;
1963
1964       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1965       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1966         return false;
1967
1968       if (!is_gimple_assign (loop_use_stmt)
1969           || code != gimple_assign_rhs_code (loop_use_stmt)
1970           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1971         return false;
1972
1973       /* Insert USE_STMT into reduction chain.  */
1974       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1975       if (current_stmt)
1976         {
1977           current_stmt_info = vinfo_for_stmt (current_stmt);
1978           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1979           GROUP_FIRST_ELEMENT (use_stmt_info)
1980             = GROUP_FIRST_ELEMENT (current_stmt_info);
1981         }
1982       else
1983         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1984
1985       lhs = gimple_assign_lhs (loop_use_stmt);
1986       current_stmt = loop_use_stmt;
1987       size++;
1988    }
1989
1990   if (!found || loop_use_stmt != phi || size < 2)
1991     return false;
1992
1993   /* Swap the operands, if needed, to make the reduction operand be the second
1994      operand.  */
1995   lhs = PHI_RESULT (phi);
1996   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1997   while (next_stmt)
1998     {
1999       if (gimple_assign_rhs2 (next_stmt) == lhs)
2000         {
2001           tree op = gimple_assign_rhs1 (next_stmt);
2002           gimple def_stmt = NULL;
2003
2004           if (TREE_CODE (op) == SSA_NAME)
2005             def_stmt = SSA_NAME_DEF_STMT (op);
2006
2007           /* Check that the other def is either defined in the loop
2008              ("vect_internal_def"), or it's an induction (defined by a
2009              loop-header phi-node).  */
2010           if (def_stmt
2011               && gimple_bb (def_stmt)
2012               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2013               && (is_gimple_assign (def_stmt)
2014                   || is_gimple_call (def_stmt)
2015                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2016                            == vect_induction_def
2017                   || (gimple_code (def_stmt) == GIMPLE_PHI
2018                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2019                                   == vect_internal_def
2020                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2021             {
2022               lhs = gimple_assign_lhs (next_stmt);
2023               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2024               continue;
2025             }
2026
2027           return false;
2028         }
2029       else
2030         {
2031           tree op = gimple_assign_rhs2 (next_stmt);
2032           gimple def_stmt = NULL;
2033
2034           if (TREE_CODE (op) == SSA_NAME)
2035             def_stmt = SSA_NAME_DEF_STMT (op);
2036
2037           /* Check that the other def is either defined in the loop
2038             ("vect_internal_def"), or it's an induction (defined by a
2039             loop-header phi-node).  */
2040           if (def_stmt
2041               && gimple_bb (def_stmt)
2042               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2043               && (is_gimple_assign (def_stmt)
2044                   || is_gimple_call (def_stmt)
2045                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2046                               == vect_induction_def
2047                   || (gimple_code (def_stmt) == GIMPLE_PHI
2048                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2049                                   == vect_internal_def
2050                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2051             {
2052               if (dump_enabled_p ())
2053                 {
2054                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2055                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2056                   dump_printf (MSG_NOTE, "\n");
2057                 }
2058
2059               swap_tree_operands (next_stmt,
2060                                   gimple_assign_rhs1_ptr (next_stmt),
2061                                   gimple_assign_rhs2_ptr (next_stmt));
2062               update_stmt (next_stmt);
2063
2064               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2065                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2066             }
2067           else
2068             return false;
2069         }
2070
2071       lhs = gimple_assign_lhs (next_stmt);
2072       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2073     }
2074
2075   /* Save the chain for further analysis in SLP detection.  */
2076   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2077   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2078   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2079
2080   return true;
2081 }
2082
2083
2084 /* Function vect_is_simple_reduction_1
2085
2086    (1) Detect a cross-iteration def-use cycle that represents a simple
2087    reduction computation.  We look for the following pattern:
2088
2089    loop_header:
2090      a1 = phi < a0, a2 >
2091      a3 = ...
2092      a2 = operation (a3, a1)
2093
2094    such that:
2095    1. operation is commutative and associative and it is safe to
2096       change the order of the computation (if CHECK_REDUCTION is true)
2097    2. no uses for a2 in the loop (a2 is used out of the loop)
2098    3. no uses of a1 in the loop besides the reduction operation
2099    4. no uses of a1 outside the loop.
2100
2101    Conditions 1,4 are tested here.
2102    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2103
2104    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2105    nested cycles, if CHECK_REDUCTION is false.
2106
2107    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2108    reductions:
2109
2110      a1 = phi < a0, a2 >
2111      inner loop (def of a3)
2112      a2 = phi < a3 >
2113
2114    If MODIFY is true it tries also to rework the code in-place to enable
2115    detection of more reduction patterns.  For the time being we rewrite
2116    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2117 */
2118
2119 static gimple
2120 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2121                             bool check_reduction, bool *double_reduc,
2122                             bool modify)
2123 {
2124   struct loop *loop = (gimple_bb (phi))->loop_father;
2125   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2126   edge latch_e = loop_latch_edge (loop);
2127   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2128   gimple def_stmt, def1 = NULL, def2 = NULL;
2129   enum tree_code orig_code, code;
2130   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2131   tree type;
2132   int nloop_uses;
2133   tree name;
2134   imm_use_iterator imm_iter;
2135   use_operand_p use_p;
2136   bool phi_def;
2137
2138   *double_reduc = false;
2139
2140   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2141      otherwise, we assume outer loop vectorization.  */
2142   gcc_assert ((check_reduction && loop == vect_loop)
2143               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2144
2145   name = PHI_RESULT (phi);
2146   nloop_uses = 0;
2147   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2148     {
2149       gimple use_stmt = USE_STMT (use_p);
2150       if (is_gimple_debug (use_stmt))
2151         continue;
2152
2153       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2154         {
2155           if (dump_enabled_p ())
2156             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2157                              "intermediate value used outside loop.\n");
2158
2159           return NULL;
2160         }
2161
2162       if (vinfo_for_stmt (use_stmt)
2163           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2164         nloop_uses++;
2165       if (nloop_uses > 1)
2166         {
2167           if (dump_enabled_p ())
2168             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2169                              "reduction used in loop.\n");
2170           return NULL;
2171         }
2172     }
2173
2174   if (TREE_CODE (loop_arg) != SSA_NAME)
2175     {
2176       if (dump_enabled_p ())
2177         {
2178           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2179                            "reduction: not ssa_name: ");
2180           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2181           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2182         }
2183       return NULL;
2184     }
2185
2186   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2187   if (!def_stmt)
2188     {
2189       if (dump_enabled_p ())
2190         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2191                          "reduction: no def_stmt.\n");
2192       return NULL;
2193     }
2194
2195   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2196     {
2197       if (dump_enabled_p ())
2198         {
2199           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2200           dump_printf (MSG_NOTE, "\n");
2201         }
2202       return NULL;
2203     }
2204
2205   if (is_gimple_assign (def_stmt))
2206     {
2207       name = gimple_assign_lhs (def_stmt);
2208       phi_def = false;
2209     }
2210   else
2211     {
2212       name = PHI_RESULT (def_stmt);
2213       phi_def = true;
2214     }
2215
2216   nloop_uses = 0;
2217   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2218     {
2219       gimple use_stmt = USE_STMT (use_p);
2220       if (is_gimple_debug (use_stmt))
2221         continue;
2222       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2223           && vinfo_for_stmt (use_stmt)
2224           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2225         nloop_uses++;
2226       if (nloop_uses > 1)
2227         {
2228           if (dump_enabled_p ())
2229             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2230                              "reduction used in loop.\n");
2231           return NULL;
2232         }
2233     }
2234
2235   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2236      defined in the inner loop.  */
2237   if (phi_def)
2238     {
2239       op1 = PHI_ARG_DEF (def_stmt, 0);
2240
2241       if (gimple_phi_num_args (def_stmt) != 1
2242           || TREE_CODE (op1) != SSA_NAME)
2243         {
2244           if (dump_enabled_p ())
2245             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246                              "unsupported phi node definition.\n");
2247
2248           return NULL;
2249         }
2250
2251       def1 = SSA_NAME_DEF_STMT (op1);
2252       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2253           && loop->inner
2254           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2255           && is_gimple_assign (def1))
2256         {
2257           if (dump_enabled_p ())
2258             report_vect_op (MSG_NOTE, def_stmt,
2259                             "detected double reduction: ");
2260
2261           *double_reduc = true;
2262           return def_stmt;
2263         }
2264
2265       return NULL;
2266     }
2267
2268   code = orig_code = gimple_assign_rhs_code (def_stmt);
2269
2270   /* We can handle "res -= x[i]", which is non-associative by
2271      simply rewriting this into "res += -x[i]".  Avoid changing
2272      gimple instruction for the first simple tests and only do this
2273      if we're allowed to change code at all.  */
2274   if (code == MINUS_EXPR
2275       && modify
2276       && (op1 = gimple_assign_rhs1 (def_stmt))
2277       && TREE_CODE (op1) == SSA_NAME
2278       && SSA_NAME_DEF_STMT (op1) == phi)
2279     code = PLUS_EXPR;
2280
2281   if (check_reduction
2282       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2283     {
2284       if (dump_enabled_p ())
2285         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2286                         "reduction: not commutative/associative: ");
2287       return NULL;
2288     }
2289
2290   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2291     {
2292       if (code != COND_EXPR)
2293         {
2294           if (dump_enabled_p ())
2295             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2296                             "reduction: not binary operation: ");
2297
2298           return NULL;
2299         }
2300
2301       op3 = gimple_assign_rhs1 (def_stmt);
2302       if (COMPARISON_CLASS_P (op3))
2303         {
2304           op4 = TREE_OPERAND (op3, 1);
2305           op3 = TREE_OPERAND (op3, 0);
2306         }
2307
2308       op1 = gimple_assign_rhs2 (def_stmt);
2309       op2 = gimple_assign_rhs3 (def_stmt);
2310
2311       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2312         {
2313           if (dump_enabled_p ())
2314             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2315                             "reduction: uses not ssa_names: ");
2316
2317           return NULL;
2318         }
2319     }
2320   else
2321     {
2322       op1 = gimple_assign_rhs1 (def_stmt);
2323       op2 = gimple_assign_rhs2 (def_stmt);
2324
2325       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2326         {
2327           if (dump_enabled_p ())
2328             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2329                             "reduction: uses not ssa_names: ");
2330
2331           return NULL;
2332         }
2333    }
2334
2335   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2336   if ((TREE_CODE (op1) == SSA_NAME
2337        && !types_compatible_p (type,TREE_TYPE (op1)))
2338       || (TREE_CODE (op2) == SSA_NAME
2339           && !types_compatible_p (type, TREE_TYPE (op2)))
2340       || (op3 && TREE_CODE (op3) == SSA_NAME
2341           && !types_compatible_p (type, TREE_TYPE (op3)))
2342       || (op4 && TREE_CODE (op4) == SSA_NAME
2343           && !types_compatible_p (type, TREE_TYPE (op4))))
2344     {
2345       if (dump_enabled_p ())
2346         {
2347           dump_printf_loc (MSG_NOTE, vect_location,
2348                            "reduction: multiple types: operation type: ");
2349           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2350           dump_printf (MSG_NOTE, ", operands types: ");
2351           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2352                              TREE_TYPE (op1));
2353           dump_printf (MSG_NOTE, ",");
2354           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2355                              TREE_TYPE (op2));
2356           if (op3)
2357             {
2358               dump_printf (MSG_NOTE, ",");
2359               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2360                                  TREE_TYPE (op3));
2361             }
2362
2363           if (op4)
2364             {
2365               dump_printf (MSG_NOTE, ",");
2366               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2367                                  TREE_TYPE (op4));
2368             }
2369           dump_printf (MSG_NOTE, "\n");
2370         }
2371
2372       return NULL;
2373     }
2374
2375   /* Check that it's ok to change the order of the computation.
2376      Generally, when vectorizing a reduction we change the order of the
2377      computation.  This may change the behavior of the program in some
2378      cases, so we need to check that this is ok.  One exception is when
2379      vectorizing an outer-loop: the inner-loop is executed sequentially,
2380      and therefore vectorizing reductions in the inner-loop during
2381      outer-loop vectorization is safe.  */
2382
2383   /* CHECKME: check for !flag_finite_math_only too?  */
2384   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2385       && check_reduction)
2386     {
2387       /* Changing the order of operations changes the semantics.  */
2388       if (dump_enabled_p ())
2389         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2390                         "reduction: unsafe fp math optimization: ");
2391       return NULL;
2392     }
2393   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2394            && check_reduction)
2395     {
2396       /* Changing the order of operations changes the semantics.  */
2397       if (dump_enabled_p ())
2398         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2399                         "reduction: unsafe int math optimization: ");
2400       return NULL;
2401     }
2402   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2403     {
2404       /* Changing the order of operations changes the semantics.  */
2405       if (dump_enabled_p ())
2406         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2407                         "reduction: unsafe fixed-point math optimization: ");
2408       return NULL;
2409     }
2410
2411   /* If we detected "res -= x[i]" earlier, rewrite it into
2412      "res += -x[i]" now.  If this turns out to be useless reassoc
2413      will clean it up again.  */
2414   if (orig_code == MINUS_EXPR)
2415     {
2416       tree rhs = gimple_assign_rhs2 (def_stmt);
2417       tree negrhs = make_ssa_name (TREE_TYPE (rhs), NULL);
2418       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2419                                                          rhs, NULL);
2420       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2421       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2422                                                           loop_info, NULL));
2423       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2424       gimple_assign_set_rhs2 (def_stmt, negrhs);
2425       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2426       update_stmt (def_stmt);
2427     }
2428
2429   /* Reduction is safe. We're dealing with one of the following:
2430      1) integer arithmetic and no trapv
2431      2) floating point arithmetic, and special flags permit this optimization
2432      3) nested cycle (i.e., outer loop vectorization).  */
2433   if (TREE_CODE (op1) == SSA_NAME)
2434     def1 = SSA_NAME_DEF_STMT (op1);
2435
2436   if (TREE_CODE (op2) == SSA_NAME)
2437     def2 = SSA_NAME_DEF_STMT (op2);
2438
2439   if (code != COND_EXPR
2440       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2441     {
2442       if (dump_enabled_p ())
2443         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2444       return NULL;
2445     }
2446
2447   /* Check that one def is the reduction def, defined by PHI,
2448      the other def is either defined in the loop ("vect_internal_def"),
2449      or it's an induction (defined by a loop-header phi-node).  */
2450
2451   if (def2 && def2 == phi
2452       && (code == COND_EXPR
2453           || !def1 || gimple_nop_p (def1)
2454           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2455               && (is_gimple_assign (def1)
2456                   || is_gimple_call (def1)
2457                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2458                       == vect_induction_def
2459                   || (gimple_code (def1) == GIMPLE_PHI
2460                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2461                           == vect_internal_def
2462                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2463     {
2464       if (dump_enabled_p ())
2465         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2466       return def_stmt;
2467     }
2468
2469   if (def1 && def1 == phi
2470       && (code == COND_EXPR
2471           || !def2 || gimple_nop_p (def2)
2472           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2473               && (is_gimple_assign (def2)
2474                   || is_gimple_call (def2)
2475                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2476                       == vect_induction_def
2477                   || (gimple_code (def2) == GIMPLE_PHI
2478                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2479                           == vect_internal_def
2480                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2481     {
2482       if (check_reduction)
2483         {
2484           /* Swap operands (just for simplicity - so that the rest of the code
2485              can assume that the reduction variable is always the last (second)
2486              argument).  */
2487           if (dump_enabled_p ())
2488             report_vect_op (MSG_NOTE, def_stmt,
2489                             "detected reduction: need to swap operands: ");
2490
2491           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2492                               gimple_assign_rhs2_ptr (def_stmt));
2493
2494           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2495             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2496         }
2497       else
2498         {
2499           if (dump_enabled_p ())
2500             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2501         }
2502
2503       return def_stmt;
2504     }
2505
2506   /* Try to find SLP reduction chain.  */
2507   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2508     {
2509       if (dump_enabled_p ())
2510         report_vect_op (MSG_NOTE, def_stmt,
2511                         "reduction: detected reduction chain: ");
2512
2513       return def_stmt;
2514     }
2515
2516   if (dump_enabled_p ())
2517     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2518                     "reduction: unknown pattern: ");
2519
2520   return NULL;
2521 }
2522
2523 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2524    in-place.  Arguments as there.  */
2525
2526 static gimple
2527 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2528                           bool check_reduction, bool *double_reduc)
2529 {
2530   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2531                                      double_reduc, false);
2532 }
2533
2534 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2535    in-place if it enables detection of more reductions.  Arguments
2536    as there.  */
2537
2538 gimple
2539 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2540                           bool check_reduction, bool *double_reduc)
2541 {
2542   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2543                                      double_reduc, true);
2544 }
2545
2546 /* Calculate the cost of one scalar iteration of the loop.  */
2547 int
2548 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2549 {
2550   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2551   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2552   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2553   int innerloop_iters, i, stmt_cost;
2554
2555   /* Count statements in scalar loop.  Using this as scalar cost for a single
2556      iteration for now.
2557
2558      TODO: Add outer loop support.
2559
2560      TODO: Consider assigning different costs to different scalar
2561      statements.  */
2562
2563   /* FORNOW.  */
2564   innerloop_iters = 1;
2565   if (loop->inner)
2566     innerloop_iters = 50; /* FIXME */
2567
2568   for (i = 0; i < nbbs; i++)
2569     {
2570       gimple_stmt_iterator si;
2571       basic_block bb = bbs[i];
2572
2573       if (bb->loop_father == loop->inner)
2574         factor = innerloop_iters;
2575       else
2576         factor = 1;
2577
2578       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2579         {
2580           gimple stmt = gsi_stmt (si);
2581           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2582
2583           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2584             continue;
2585
2586           /* Skip stmts that are not vectorized inside the loop.  */
2587           if (stmt_info
2588               && !STMT_VINFO_RELEVANT_P (stmt_info)
2589               && (!STMT_VINFO_LIVE_P (stmt_info)
2590                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2591               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2592             continue;
2593
2594           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2595             {
2596               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2597                stmt_cost = vect_get_stmt_cost (scalar_load);
2598              else
2599                stmt_cost = vect_get_stmt_cost (scalar_store);
2600             }
2601           else
2602             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2603
2604           scalar_single_iter_cost += stmt_cost * factor;
2605         }
2606     }
2607   return scalar_single_iter_cost;
2608 }
2609
2610 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2611 int
2612 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2613                              int *peel_iters_epilogue,
2614                              int scalar_single_iter_cost,
2615                              stmt_vector_for_cost *prologue_cost_vec,
2616                              stmt_vector_for_cost *epilogue_cost_vec)
2617 {
2618   int retval = 0;
2619   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2620
2621   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2622     {
2623       *peel_iters_epilogue = vf/2;
2624       if (dump_enabled_p ())
2625         dump_printf_loc (MSG_NOTE, vect_location,
2626                          "cost model: epilogue peel iters set to vf/2 "
2627                          "because loop iterations are unknown .\n");
2628
2629       /* If peeled iterations are known but number of scalar loop
2630          iterations are unknown, count a taken branch per peeled loop.  */
2631       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2632                                  NULL, 0, vect_prologue);
2633     }
2634   else
2635     {
2636       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2637       peel_iters_prologue = niters < peel_iters_prologue ?
2638                             niters : peel_iters_prologue;
2639       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2640       /* If we need to peel for gaps, but no peeling is required, we have to
2641          peel VF iterations.  */
2642       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2643         *peel_iters_epilogue = vf;
2644     }
2645
2646   if (peel_iters_prologue)
2647     retval += record_stmt_cost (prologue_cost_vec,
2648                                 peel_iters_prologue * scalar_single_iter_cost,
2649                                 scalar_stmt, NULL, 0, vect_prologue);
2650   if (*peel_iters_epilogue)
2651     retval += record_stmt_cost (epilogue_cost_vec,
2652                                 *peel_iters_epilogue * scalar_single_iter_cost,
2653                                 scalar_stmt, NULL, 0, vect_epilogue);
2654   return retval;
2655 }
2656
2657 /* Function vect_estimate_min_profitable_iters
2658
2659    Return the number of iterations required for the vector version of the
2660    loop to be profitable relative to the cost of the scalar version of the
2661    loop.  */
2662
2663 static void
2664 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2665                                     int *ret_min_profitable_niters,
2666                                     int *ret_min_profitable_estimate)
2667 {
2668   int min_profitable_iters;
2669   int min_profitable_estimate;
2670   int peel_iters_prologue;
2671   int peel_iters_epilogue;
2672   unsigned vec_inside_cost = 0;
2673   int vec_outside_cost = 0;
2674   unsigned vec_prologue_cost = 0;
2675   unsigned vec_epilogue_cost = 0;
2676   int scalar_single_iter_cost = 0;
2677   int scalar_outside_cost = 0;
2678   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2679   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2680   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2681
2682   /* Cost model disabled.  */
2683   if (!flag_vect_cost_model)
2684     {
2685       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2686       *ret_min_profitable_niters = 0;
2687       *ret_min_profitable_estimate = 0;
2688       return;
2689     }
2690
2691   /* Requires loop versioning tests to handle misalignment.  */
2692   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2693     {
2694       /*  FIXME: Make cost depend on complexity of individual check.  */
2695       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2696       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2697                             vect_prologue);
2698       dump_printf (MSG_NOTE,
2699                    "cost model: Adding cost of checks for loop "
2700                    "versioning to treat misalignment.\n");
2701     }
2702
2703   /* Requires loop versioning with alias checks.  */
2704   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2705     {
2706       /*  FIXME: Make cost depend on complexity of individual check.  */
2707       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2708       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2709                             vect_prologue);
2710       dump_printf (MSG_NOTE,
2711                    "cost model: Adding cost of checks for loop "
2712                    "versioning aliasing.\n");
2713     }
2714
2715   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2716       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2717     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2718                           vect_prologue);
2719
2720   /* Count statements in scalar loop.  Using this as scalar cost for a single
2721      iteration for now.
2722
2723      TODO: Add outer loop support.
2724
2725      TODO: Consider assigning different costs to different scalar
2726      statements.  */
2727
2728   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2729
2730   /* Add additional cost for the peeled instructions in prologue and epilogue
2731      loop.
2732
2733      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2734      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2735
2736      TODO: Build an expression that represents peel_iters for prologue and
2737      epilogue to be used in a run-time test.  */
2738
2739   if (npeel  < 0)
2740     {
2741       peel_iters_prologue = vf/2;
2742       dump_printf (MSG_NOTE, "cost model: "
2743                    "prologue peel iters set to vf/2.\n");
2744
2745       /* If peeling for alignment is unknown, loop bound of main loop becomes
2746          unknown.  */
2747       peel_iters_epilogue = vf/2;
2748       dump_printf (MSG_NOTE, "cost model: "
2749                    "epilogue peel iters set to vf/2 because "
2750                    "peeling for alignment is unknown.\n");
2751
2752       /* If peeled iterations are unknown, count a taken branch and a not taken
2753          branch per peeled loop. Even if scalar loop iterations are known,
2754          vector iterations are not known since peeled prologue iterations are
2755          not known. Hence guards remain the same.  */
2756       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2757                             NULL, 0, vect_prologue);
2758       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2759                             NULL, 0, vect_prologue);
2760       /* FORNOW: Don't attempt to pass individual scalar instructions to
2761          the model; just assume linear cost for scalar iterations.  */
2762       (void) add_stmt_cost (target_cost_data,
2763                             peel_iters_prologue * scalar_single_iter_cost,
2764                             scalar_stmt, NULL, 0, vect_prologue);
2765       (void) add_stmt_cost (target_cost_data,
2766                             peel_iters_epilogue * scalar_single_iter_cost,
2767                             scalar_stmt, NULL, 0, vect_epilogue);
2768     }
2769   else
2770     {
2771       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2772       stmt_info_for_cost *si;
2773       int j;
2774       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2775
2776       prologue_cost_vec.create (2);
2777       epilogue_cost_vec.create (2);
2778       peel_iters_prologue = npeel;
2779
2780       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2781                                           &peel_iters_epilogue,
2782                                           scalar_single_iter_cost,
2783                                           &prologue_cost_vec,
2784                                           &epilogue_cost_vec);
2785
2786       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2787         {
2788           struct _stmt_vec_info *stmt_info
2789             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2790           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2791                                 si->misalign, vect_prologue);
2792         }
2793
2794       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2795         {
2796           struct _stmt_vec_info *stmt_info
2797             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2798           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2799                                 si->misalign, vect_epilogue);
2800         }
2801
2802       prologue_cost_vec.release ();
2803       epilogue_cost_vec.release ();
2804     }
2805
2806   /* FORNOW: The scalar outside cost is incremented in one of the
2807      following ways:
2808
2809      1. The vectorizer checks for alignment and aliasing and generates
2810      a condition that allows dynamic vectorization.  A cost model
2811      check is ANDED with the versioning condition.  Hence scalar code
2812      path now has the added cost of the versioning check.
2813
2814        if (cost > th & versioning_check)
2815          jmp to vector code
2816
2817      Hence run-time scalar is incremented by not-taken branch cost.
2818
2819      2. The vectorizer then checks if a prologue is required.  If the
2820      cost model check was not done before during versioning, it has to
2821      be done before the prologue check.
2822
2823        if (cost <= th)
2824          prologue = scalar_iters
2825        if (prologue == 0)
2826          jmp to vector code
2827        else
2828          execute prologue
2829        if (prologue == num_iters)
2830          go to exit
2831
2832      Hence the run-time scalar cost is incremented by a taken branch,
2833      plus a not-taken branch, plus a taken branch cost.
2834
2835      3. The vectorizer then checks if an epilogue is required.  If the
2836      cost model check was not done before during prologue check, it
2837      has to be done with the epilogue check.
2838
2839        if (prologue == 0)
2840          jmp to vector code
2841        else
2842          execute prologue
2843        if (prologue == num_iters)
2844          go to exit
2845        vector code:
2846          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2847            jmp to epilogue
2848
2849      Hence the run-time scalar cost should be incremented by 2 taken
2850      branches.
2851
2852      TODO: The back end may reorder the BBS's differently and reverse
2853      conditions/branch directions.  Change the estimates below to
2854      something more reasonable.  */
2855
2856   /* If the number of iterations is known and we do not do versioning, we can
2857      decide whether to vectorize at compile time.  Hence the scalar version
2858      do not carry cost model guard costs.  */
2859   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2860       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2861       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2862     {
2863       /* Cost model check occurs at versioning.  */
2864       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2865           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2866         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2867       else
2868         {
2869           /* Cost model check occurs at prologue generation.  */
2870           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2871             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2872               + vect_get_stmt_cost (cond_branch_not_taken);
2873           /* Cost model check occurs at epilogue generation.  */
2874           else
2875             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2876         }
2877     }
2878
2879   /* Complete the target-specific cost calculations.  */
2880   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2881                &vec_inside_cost, &vec_epilogue_cost);
2882
2883   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2884
2885   /* Calculate number of iterations required to make the vector version
2886      profitable, relative to the loop bodies only.  The following condition
2887      must hold true:
2888      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2889      where
2890      SIC = scalar iteration cost, VIC = vector iteration cost,
2891      VOC = vector outside cost, VF = vectorization factor,
2892      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2893      SOC = scalar outside cost for run time cost model check.  */
2894
2895   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2896     {
2897       if (vec_outside_cost <= 0)
2898         min_profitable_iters = 1;
2899       else
2900         {
2901           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2902                                   - vec_inside_cost * peel_iters_prologue
2903                                   - vec_inside_cost * peel_iters_epilogue)
2904                                  / ((scalar_single_iter_cost * vf)
2905                                     - vec_inside_cost);
2906
2907           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2908               <= (((int) vec_inside_cost * min_profitable_iters)
2909                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2910             min_profitable_iters++;
2911         }
2912     }
2913   /* vector version will never be profitable.  */
2914   else
2915     {
2916       if (dump_enabled_p ())
2917         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2918                          "cost model: the vector iteration cost = %d "
2919                          "divided by the scalar iteration cost = %d "
2920                          "is greater or equal to the vectorization factor = %d"
2921                          ".\n",
2922                          vec_inside_cost, scalar_single_iter_cost, vf);
2923       *ret_min_profitable_niters = -1;
2924       *ret_min_profitable_estimate = -1;
2925       return;
2926     }
2927
2928   if (dump_enabled_p ())
2929     {
2930       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2931       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2932                    vec_inside_cost);
2933       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2934                    vec_prologue_cost);
2935       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2936                    vec_epilogue_cost);
2937       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2938                    scalar_single_iter_cost);
2939       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2940                    scalar_outside_cost);
2941       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2942                    vec_outside_cost);
2943       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2944                    peel_iters_prologue);
2945       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2946                    peel_iters_epilogue);
2947       dump_printf (MSG_NOTE,
2948                    "  Calculated minimum iters for profitability: %d\n",
2949                    min_profitable_iters);
2950       dump_printf (MSG_NOTE, "\n");
2951     }
2952
2953   min_profitable_iters =
2954         min_profitable_iters < vf ? vf : min_profitable_iters;
2955
2956   /* Because the condition we create is:
2957      if (niters <= min_profitable_iters)
2958        then skip the vectorized loop.  */
2959   min_profitable_iters--;
2960
2961   if (dump_enabled_p ())
2962     dump_printf_loc (MSG_NOTE, vect_location,
2963                      "  Runtime profitability threshold = %d\n",
2964                      min_profitable_iters);
2965
2966   *ret_min_profitable_niters = min_profitable_iters;
2967
2968   /* Calculate number of iterations required to make the vector version
2969      profitable, relative to the loop bodies only.
2970
2971      Non-vectorized variant is SIC * niters and it must win over vector
2972      variant on the expected loop trip count.  The following condition must hold true:
2973      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2974
2975   if (vec_outside_cost <= 0)
2976     min_profitable_estimate = 1;
2977   else
2978     {
2979       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2980                                  - vec_inside_cost * peel_iters_prologue
2981                                  - vec_inside_cost * peel_iters_epilogue)
2982                                  / ((scalar_single_iter_cost * vf)
2983                                    - vec_inside_cost);
2984     }
2985   min_profitable_estimate --;
2986   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
2987   if (dump_enabled_p ())
2988     dump_printf_loc (MSG_NOTE, vect_location,
2989                      "  Static estimate profitability threshold = %d\n",
2990                       min_profitable_iters);
2991
2992   *ret_min_profitable_estimate = min_profitable_estimate;
2993 }
2994
2995
2996 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2997    functions. Design better to avoid maintenance issues.  */
2998
2999 /* Function vect_model_reduction_cost.
3000
3001    Models cost for a reduction operation, including the vector ops
3002    generated within the strip-mine loop, the initial definition before
3003    the loop, and the epilogue code that must be generated.  */
3004
3005 static bool
3006 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3007                            int ncopies)
3008 {
3009   int prologue_cost = 0, epilogue_cost = 0;
3010   enum tree_code code;
3011   optab optab;
3012   tree vectype;
3013   gimple stmt, orig_stmt;
3014   tree reduction_op;
3015   enum machine_mode mode;
3016   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3017   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3018   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3019
3020   /* Cost of reduction op inside loop.  */
3021   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3022                                         stmt_info, 0, vect_body);
3023   stmt = STMT_VINFO_STMT (stmt_info);
3024
3025   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3026     {
3027     case GIMPLE_SINGLE_RHS:
3028       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
3029       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
3030       break;
3031     case GIMPLE_UNARY_RHS:
3032       reduction_op = gimple_assign_rhs1 (stmt);
3033       break;
3034     case GIMPLE_BINARY_RHS:
3035       reduction_op = gimple_assign_rhs2 (stmt);
3036       break;
3037     case GIMPLE_TERNARY_RHS:
3038       reduction_op = gimple_assign_rhs3 (stmt);
3039       break;
3040     default:
3041       gcc_unreachable ();
3042     }
3043
3044   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3045   if (!vectype)
3046     {
3047       if (dump_enabled_p ())
3048         {
3049           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3050                            "unsupported data-type ");
3051           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3052                              TREE_TYPE (reduction_op));
3053           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3054         }
3055       return false;
3056    }
3057
3058   mode = TYPE_MODE (vectype);
3059   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3060
3061   if (!orig_stmt)
3062     orig_stmt = STMT_VINFO_STMT (stmt_info);
3063
3064   code = gimple_assign_rhs_code (orig_stmt);
3065
3066   /* Add in cost for initial definition.  */
3067   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3068                                   stmt_info, 0, vect_prologue);
3069
3070   /* Determine cost of epilogue code.
3071
3072      We have a reduction operator that will reduce the vector in one statement.
3073      Also requires scalar extract.  */
3074
3075   if (!nested_in_vect_loop_p (loop, orig_stmt))
3076     {
3077       if (reduc_code != ERROR_MARK)
3078         {
3079           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3080                                           stmt_info, 0, vect_epilogue);
3081           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3082                                           stmt_info, 0, vect_epilogue);
3083         }
3084       else
3085         {
3086           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3087           tree bitsize =
3088             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3089           int element_bitsize = tree_low_cst (bitsize, 1);
3090           int nelements = vec_size_in_bits / element_bitsize;
3091
3092           optab = optab_for_tree_code (code, vectype, optab_default);
3093
3094           /* We have a whole vector shift available.  */
3095           if (VECTOR_MODE_P (mode)
3096               && optab_handler (optab, mode) != CODE_FOR_nothing
3097               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3098             {
3099               /* Final reduction via vector shifts and the reduction operator.
3100                  Also requires scalar extract.  */
3101               epilogue_cost += add_stmt_cost (target_cost_data,
3102                                               exact_log2 (nelements) * 2,
3103                                               vector_stmt, stmt_info, 0,
3104                                               vect_epilogue);
3105               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3106                                               vec_to_scalar, stmt_info, 0,
3107                                               vect_epilogue);
3108             }
3109           else
3110             /* Use extracts and reduction op for final reduction.  For N
3111                elements, we have N extracts and N-1 reduction ops.  */
3112             epilogue_cost += add_stmt_cost (target_cost_data,
3113                                             nelements + nelements - 1,
3114                                             vector_stmt, stmt_info, 0,
3115                                             vect_epilogue);
3116         }
3117     }
3118
3119   if (dump_enabled_p ())
3120     dump_printf (MSG_NOTE,
3121                  "vect_model_reduction_cost: inside_cost = %d, "
3122                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3123                  prologue_cost, epilogue_cost);
3124
3125   return true;
3126 }
3127
3128
3129 /* Function vect_model_induction_cost.
3130
3131    Models cost for induction operations.  */
3132
3133 static void
3134 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3135 {
3136   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3137   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3138   unsigned inside_cost, prologue_cost;
3139
3140   /* loop cost for vec_loop.  */
3141   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3142                                stmt_info, 0, vect_body);
3143
3144   /* prologue cost for vec_init and vec_step.  */
3145   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3146                                  stmt_info, 0, vect_prologue);
3147
3148   if (dump_enabled_p ())
3149     dump_printf_loc (MSG_NOTE, vect_location,
3150                      "vect_model_induction_cost: inside_cost = %d, "
3151                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3152 }
3153
3154
3155 /* Function get_initial_def_for_induction
3156
3157    Input:
3158    STMT - a stmt that performs an induction operation in the loop.
3159    IV_PHI - the initial value of the induction variable
3160
3161    Output:
3162    Return a vector variable, initialized with the first VF values of
3163    the induction variable.  E.g., for an iv with IV_PHI='X' and
3164    evolution S, for a vector of 4 units, we want to return:
3165    [X, X + S, X + 2*S, X + 3*S].  */
3166
3167 static tree
3168 get_initial_def_for_induction (gimple iv_phi)
3169 {
3170   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3171   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3173   tree vectype;
3174   int nunits;
3175   edge pe = loop_preheader_edge (loop);
3176   struct loop *iv_loop;
3177   basic_block new_bb;
3178   tree new_vec, vec_init, vec_step, t;
3179   tree access_fn;
3180   tree new_var;
3181   tree new_name;
3182   gimple init_stmt, induction_phi, new_stmt;
3183   tree induc_def, vec_def, vec_dest;
3184   tree init_expr, step_expr;
3185   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3186   int i;
3187   bool ok;
3188   int ncopies;
3189   tree expr;
3190   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3191   bool nested_in_vect_loop = false;
3192   gimple_seq stmts = NULL;
3193   imm_use_iterator imm_iter;
3194   use_operand_p use_p;
3195   gimple exit_phi;
3196   edge latch_e;
3197   tree loop_arg;
3198   gimple_stmt_iterator si;
3199   basic_block bb = gimple_bb (iv_phi);
3200   tree stepvectype;
3201   tree resvectype;
3202
3203   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3204   if (nested_in_vect_loop_p (loop, iv_phi))
3205     {
3206       nested_in_vect_loop = true;
3207       iv_loop = loop->inner;
3208     }
3209   else
3210     iv_loop = loop;
3211   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3212
3213   latch_e = loop_latch_edge (iv_loop);
3214   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3215
3216   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3217   gcc_assert (access_fn);
3218   STRIP_NOPS (access_fn);
3219   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3220                                     &init_expr, &step_expr);
3221   gcc_assert (ok);
3222   pe = loop_preheader_edge (iv_loop);
3223
3224   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3225   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3226   gcc_assert (vectype);
3227   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3228   ncopies = vf / nunits;
3229
3230   gcc_assert (phi_info);
3231   gcc_assert (ncopies >= 1);
3232
3233   /* Find the first insertion point in the BB.  */
3234   si = gsi_after_labels (bb);
3235
3236   /* Create the vector that holds the initial_value of the induction.  */
3237   if (nested_in_vect_loop)
3238     {
3239       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3240          been created during vectorization of previous stmts.  We obtain it
3241          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3242       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3243                                            loop_preheader_edge (iv_loop));
3244       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3245       /* If the initial value is not of proper type, convert it.  */
3246       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3247         {
3248           new_stmt = gimple_build_assign_with_ops
3249               (VIEW_CONVERT_EXPR,
3250                vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"),
3251                build1 (VIEW_CONVERT_EXPR, vectype, vec_init), NULL_TREE);
3252           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3253           gimple_assign_set_lhs (new_stmt, vec_init);
3254           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3255                                                  new_stmt);
3256           gcc_assert (!new_bb);
3257           set_vinfo_for_stmt (new_stmt,
3258                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3259         }
3260     }
3261   else
3262     {
3263       vec<constructor_elt, va_gc> *v;
3264
3265       /* iv_loop is the loop to be vectorized. Create:
3266          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3267       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3268                                        vect_scalar_var, "var_");
3269       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3270                                                      init_expr),
3271                                        &stmts, false, new_var);
3272       if (stmts)
3273         {
3274           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3275           gcc_assert (!new_bb);
3276         }
3277
3278       vec_alloc (v, nunits);
3279       bool constant_p = is_gimple_min_invariant (new_name);
3280       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3281       for (i = 1; i < nunits; i++)
3282         {
3283           /* Create: new_name_i = new_name + step_expr  */
3284           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3285                                   new_name, step_expr);
3286           if (!is_gimple_min_invariant (new_name))
3287             {
3288               init_stmt = gimple_build_assign (new_var, new_name);
3289               new_name = make_ssa_name (new_var, init_stmt);
3290               gimple_assign_set_lhs (init_stmt, new_name);
3291               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3292               gcc_assert (!new_bb);
3293               if (dump_enabled_p ())
3294                 {
3295                   dump_printf_loc (MSG_NOTE, vect_location,
3296                                    "created new init_stmt: ");
3297                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3298                   dump_printf (MSG_NOTE, "\n");
3299                 }
3300               constant_p = false;
3301             }
3302           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3303         }
3304       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3305       if (constant_p)
3306         new_vec = build_vector_from_ctor (vectype, v);
3307       else
3308         new_vec = build_constructor (vectype, v);
3309       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3310     }
3311
3312
3313   /* Create the vector that holds the step of the induction.  */
3314   if (nested_in_vect_loop)
3315     /* iv_loop is nested in the loop to be vectorized. Generate:
3316        vec_step = [S, S, S, S]  */
3317     new_name = step_expr;
3318   else
3319     {
3320       /* iv_loop is the loop to be vectorized. Generate:
3321           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3322       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3323         {
3324           expr = build_int_cst (integer_type_node, vf);
3325           expr = fold_convert (TREE_TYPE (step_expr), expr);
3326         }
3327       else
3328         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3329       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3330                               expr, step_expr);
3331       if (TREE_CODE (step_expr) == SSA_NAME)
3332         new_name = vect_init_vector (iv_phi, new_name,
3333                                      TREE_TYPE (step_expr), NULL);
3334     }
3335
3336   t = unshare_expr (new_name);
3337   gcc_assert (CONSTANT_CLASS_P (new_name)
3338               || TREE_CODE (new_name) == SSA_NAME);
3339   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3340   gcc_assert (stepvectype);
3341   new_vec = build_vector_from_val (stepvectype, t);
3342   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3343
3344
3345   /* Create the following def-use cycle:
3346      loop prolog:
3347          vec_init = ...
3348          vec_step = ...
3349      loop:
3350          vec_iv = PHI <vec_init, vec_loop>
3351          ...
3352          STMT
3353          ...
3354          vec_loop = vec_iv + vec_step;  */
3355
3356   /* Create the induction-phi that defines the induction-operand.  */
3357   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3358   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3359   set_vinfo_for_stmt (induction_phi,
3360                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3361   induc_def = PHI_RESULT (induction_phi);
3362
3363   /* Create the iv update inside the loop  */
3364   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3365                                            induc_def, vec_step);
3366   vec_def = make_ssa_name (vec_dest, new_stmt);
3367   gimple_assign_set_lhs (new_stmt, vec_def);
3368   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3369   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3370                                                    NULL));
3371
3372   /* Set the arguments of the phi node:  */
3373   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3374   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3375                UNKNOWN_LOCATION);
3376
3377
3378   /* In case that vectorization factor (VF) is bigger than the number
3379      of elements that we can fit in a vectype (nunits), we have to generate
3380      more than one vector stmt - i.e - we need to "unroll" the
3381      vector stmt by a factor VF/nunits.  For more details see documentation
3382      in vectorizable_operation.  */
3383
3384   if (ncopies > 1)
3385     {
3386       stmt_vec_info prev_stmt_vinfo;
3387       /* FORNOW. This restriction should be relaxed.  */
3388       gcc_assert (!nested_in_vect_loop);
3389
3390       /* Create the vector that holds the step of the induction.  */
3391       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3392         {
3393           expr = build_int_cst (integer_type_node, nunits);
3394           expr = fold_convert (TREE_TYPE (step_expr), expr);
3395         }
3396       else
3397         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3398       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3399                               expr, step_expr);
3400       if (TREE_CODE (step_expr) == SSA_NAME)
3401         new_name = vect_init_vector (iv_phi, new_name,
3402                                      TREE_TYPE (step_expr), NULL);
3403       t = unshare_expr (new_name);
3404       gcc_assert (CONSTANT_CLASS_P (new_name)
3405                   || TREE_CODE (new_name) == SSA_NAME);
3406       new_vec = build_vector_from_val (stepvectype, t);
3407       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3408
3409       vec_def = induc_def;
3410       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3411       for (i = 1; i < ncopies; i++)
3412         {
3413           /* vec_i = vec_prev + vec_step  */
3414           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3415                                                    vec_def, vec_step);
3416           vec_def = make_ssa_name (vec_dest, new_stmt);
3417           gimple_assign_set_lhs (new_stmt, vec_def);
3418
3419           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3420           if (!useless_type_conversion_p (resvectype, vectype))
3421             {
3422               new_stmt = gimple_build_assign_with_ops
3423                   (VIEW_CONVERT_EXPR,
3424                    vect_get_new_vect_var (resvectype, vect_simple_var,
3425                                           "vec_iv_"),
3426                    build1 (VIEW_CONVERT_EXPR, resvectype,
3427                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3428               gimple_assign_set_lhs (new_stmt,
3429                                      make_ssa_name
3430                                        (gimple_assign_lhs (new_stmt), new_stmt));
3431               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3432             }
3433           set_vinfo_for_stmt (new_stmt,
3434                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3435           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3436           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3437         }
3438     }
3439
3440   if (nested_in_vect_loop)
3441     {
3442       /* Find the loop-closed exit-phi of the induction, and record
3443          the final vector of induction results:  */
3444       exit_phi = NULL;
3445       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3446         {
3447           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3448             {
3449               exit_phi = USE_STMT (use_p);
3450               break;
3451             }
3452         }
3453       if (exit_phi)
3454         {
3455           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3456           /* FORNOW. Currently not supporting the case that an inner-loop induction
3457              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3458           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3459                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3460
3461           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3462           if (dump_enabled_p ())
3463             {
3464               dump_printf_loc (MSG_NOTE, vect_location,
3465                                "vector of inductions after inner-loop:");
3466               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3467               dump_printf (MSG_NOTE, "\n");
3468             }
3469         }
3470     }
3471
3472
3473   if (dump_enabled_p ())
3474     {
3475       dump_printf_loc (MSG_NOTE, vect_location,
3476                        "transform induction: created def-use cycle: ");
3477       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3478       dump_printf (MSG_NOTE, "\n");
3479       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3480                         SSA_NAME_DEF_STMT (vec_def), 0);
3481       dump_printf (MSG_NOTE, "\n");
3482     }
3483
3484   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3485   if (!useless_type_conversion_p (resvectype, vectype))
3486     {
3487       new_stmt = gimple_build_assign_with_ops
3488          (VIEW_CONVERT_EXPR,
3489           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3490           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3491       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3492       gimple_assign_set_lhs (new_stmt, induc_def);
3493       si = gsi_after_labels (bb);
3494       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3495       set_vinfo_for_stmt (new_stmt,
3496                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3497       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3498         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3499     }
3500
3501   return induc_def;
3502 }
3503
3504
3505 /* Function get_initial_def_for_reduction
3506
3507    Input:
3508    STMT - a stmt that performs a reduction operation in the loop.
3509    INIT_VAL - the initial value of the reduction variable
3510
3511    Output:
3512    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3513         of the reduction (used for adjusting the epilog - see below).
3514    Return a vector variable, initialized according to the operation that STMT
3515         performs. This vector will be used as the initial value of the
3516         vector of partial results.
3517
3518    Option1 (adjust in epilog): Initialize the vector as follows:
3519      add/bit or/xor:    [0,0,...,0,0]
3520      mult/bit and:      [1,1,...,1,1]
3521      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3522    and when necessary (e.g. add/mult case) let the caller know
3523    that it needs to adjust the result by init_val.
3524
3525    Option2: Initialize the vector as follows:
3526      add/bit or/xor:    [init_val,0,0,...,0]
3527      mult/bit and:      [init_val,1,1,...,1]
3528      min/max/cond_expr: [init_val,init_val,...,init_val]
3529    and no adjustments are needed.
3530
3531    For example, for the following code:
3532
3533    s = init_val;
3534    for (i=0;i<n;i++)
3535      s = s + a[i];
3536
3537    STMT is 's = s + a[i]', and the reduction variable is 's'.
3538    For a vector of 4 units, we want to return either [0,0,0,init_val],
3539    or [0,0,0,0] and let the caller know that it needs to adjust
3540    the result at the end by 'init_val'.
3541
3542    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3543    initialization vector is simpler (same element in all entries), if
3544    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3545
3546    A cost model should help decide between these two schemes.  */
3547
3548 tree
3549 get_initial_def_for_reduction (gimple stmt, tree init_val,
3550                                tree *adjustment_def)
3551 {
3552   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3553   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3554   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3555   tree scalar_type = TREE_TYPE (init_val);
3556   tree vectype = get_vectype_for_scalar_type (scalar_type);
3557   int nunits;
3558   enum tree_code code = gimple_assign_rhs_code (stmt);
3559   tree def_for_init;
3560   tree init_def;
3561   tree *elts;
3562   int i;
3563   bool nested_in_vect_loop = false;
3564   tree init_value;
3565   REAL_VALUE_TYPE real_init_val = dconst0;
3566   int int_init_val = 0;
3567   gimple def_stmt = NULL;
3568
3569   gcc_assert (vectype);
3570   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3571
3572   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3573               || SCALAR_FLOAT_TYPE_P (scalar_type));
3574
3575   if (nested_in_vect_loop_p (loop, stmt))
3576     nested_in_vect_loop = true;
3577   else
3578     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3579
3580   /* In case of double reduction we only create a vector variable to be put
3581      in the reduction phi node.  The actual statement creation is done in
3582      vect_create_epilog_for_reduction.  */
3583   if (adjustment_def && nested_in_vect_loop
3584       && TREE_CODE (init_val) == SSA_NAME
3585       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3586       && gimple_code (def_stmt) == GIMPLE_PHI
3587       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3588       && vinfo_for_stmt (def_stmt)
3589       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3590           == vect_double_reduction_def)
3591     {
3592       *adjustment_def = NULL;
3593       return vect_create_destination_var (init_val, vectype);
3594     }
3595
3596   if (TREE_CONSTANT (init_val))
3597     {
3598       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3599         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3600       else
3601         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3602     }
3603   else
3604     init_value = init_val;
3605
3606   switch (code)
3607     {
3608       case WIDEN_SUM_EXPR:
3609       case DOT_PROD_EXPR:
3610       case PLUS_EXPR:
3611       case MINUS_EXPR:
3612       case BIT_IOR_EXPR:
3613       case BIT_XOR_EXPR:
3614       case MULT_EXPR:
3615       case BIT_AND_EXPR:
3616         /* ADJUSMENT_DEF is NULL when called from
3617            vect_create_epilog_for_reduction to vectorize double reduction.  */
3618         if (adjustment_def)
3619           {
3620             if (nested_in_vect_loop)
3621               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3622                                                               NULL);
3623             else
3624               *adjustment_def = init_val;
3625           }
3626
3627         if (code == MULT_EXPR)
3628           {
3629             real_init_val = dconst1;
3630             int_init_val = 1;
3631           }
3632
3633         if (code == BIT_AND_EXPR)
3634           int_init_val = -1;
3635
3636         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3637           def_for_init = build_real (scalar_type, real_init_val);
3638         else
3639           def_for_init = build_int_cst (scalar_type, int_init_val);
3640
3641         /* Create a vector of '0' or '1' except the first element.  */
3642         elts = XALLOCAVEC (tree, nunits);
3643         for (i = nunits - 2; i >= 0; --i)
3644           elts[i + 1] = def_for_init;
3645
3646         /* Option1: the first element is '0' or '1' as well.  */
3647         if (adjustment_def)
3648           {
3649             elts[0] = def_for_init;
3650             init_def = build_vector (vectype, elts);
3651             break;
3652           }
3653
3654         /* Option2: the first element is INIT_VAL.  */
3655         elts[0] = init_val;
3656         if (TREE_CONSTANT (init_val))
3657           init_def = build_vector (vectype, elts);
3658         else
3659           {
3660             vec<constructor_elt, va_gc> *v;
3661             vec_alloc (v, nunits);
3662             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3663             for (i = 1; i < nunits; ++i)
3664               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3665             init_def = build_constructor (vectype, v);
3666           }
3667
3668         break;
3669
3670       case MIN_EXPR:
3671       case MAX_EXPR:
3672       case COND_EXPR:
3673         if (adjustment_def)
3674           {
3675             *adjustment_def = NULL_TREE;
3676             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3677             break;
3678           }
3679
3680         init_def = build_vector_from_val (vectype, init_value);
3681         break;
3682
3683       default:
3684         gcc_unreachable ();
3685     }
3686
3687   return init_def;
3688 }
3689
3690
3691 /* Function vect_create_epilog_for_reduction
3692
3693    Create code at the loop-epilog to finalize the result of a reduction
3694    computation.
3695
3696    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3697      reduction statements.
3698    STMT is the scalar reduction stmt that is being vectorized.
3699    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3700      number of elements that we can fit in a vectype (nunits).  In this case
3701      we have to generate more than one vector stmt - i.e - we need to "unroll"
3702      the vector stmt by a factor VF/nunits.  For more details see documentation
3703      in vectorizable_operation.
3704    REDUC_CODE is the tree-code for the epilog reduction.
3705    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3706      computation.
3707    REDUC_INDEX is the index of the operand in the right hand side of the
3708      statement that is defined by REDUCTION_PHI.
3709    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3710    SLP_NODE is an SLP node containing a group of reduction statements. The
3711      first one in this group is STMT.
3712
3713    This function:
3714    1. Creates the reduction def-use cycles: sets the arguments for
3715       REDUCTION_PHIS:
3716       The loop-entry argument is the vectorized initial-value of the reduction.
3717       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3718       sums.
3719    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3720       by applying the operation specified by REDUC_CODE if available, or by
3721       other means (whole-vector shifts or a scalar loop).
3722       The function also creates a new phi node at the loop exit to preserve
3723       loop-closed form, as illustrated below.
3724
3725      The flow at the entry to this function:
3726
3727         loop:
3728           vec_def = phi <null, null>            # REDUCTION_PHI
3729           VECT_DEF = vector_stmt                # vectorized form of STMT
3730           s_loop = scalar_stmt                  # (scalar) STMT
3731         loop_exit:
3732           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3733           use <s_out0>
3734           use <s_out0>
3735
3736      The above is transformed by this function into:
3737
3738         loop:
3739           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3740           VECT_DEF = vector_stmt                # vectorized form of STMT
3741           s_loop = scalar_stmt                  # (scalar) STMT
3742         loop_exit:
3743           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3744           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3745           v_out2 = reduce <v_out1>
3746           s_out3 = extract_field <v_out2, 0>
3747           s_out4 = adjust_result <s_out3>
3748           use <s_out4>
3749           use <s_out4>
3750 */
3751
3752 static void
3753 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
3754                                   int ncopies, enum tree_code reduc_code,
3755                                   vec<gimple> reduction_phis,
3756                                   int reduc_index, bool double_reduc,
3757                                   slp_tree slp_node)
3758 {
3759   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3760   stmt_vec_info prev_phi_info;
3761   tree vectype;
3762   enum machine_mode mode;
3763   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3764   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3765   basic_block exit_bb;
3766   tree scalar_dest;
3767   tree scalar_type;
3768   gimple new_phi = NULL, phi;
3769   gimple_stmt_iterator exit_gsi;
3770   tree vec_dest;
3771   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3772   gimple epilog_stmt = NULL;
3773   enum tree_code code = gimple_assign_rhs_code (stmt);
3774   gimple exit_phi;
3775   tree bitsize, bitpos;
3776   tree adjustment_def = NULL;
3777   tree vec_initial_def = NULL;
3778   tree reduction_op, expr, def;
3779   tree orig_name, scalar_result;
3780   imm_use_iterator imm_iter, phi_imm_iter;
3781   use_operand_p use_p, phi_use_p;
3782   bool extract_scalar_result = false;
3783   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3784   bool nested_in_vect_loop = false;
3785   vec<gimple> new_phis = vNULL;
3786   vec<gimple> inner_phis = vNULL;
3787   enum vect_def_type dt = vect_unknown_def_type;
3788   int j, i;
3789   vec<tree> scalar_results = vNULL;
3790   unsigned int group_size = 1, k, ratio;
3791   vec<tree> vec_initial_defs = vNULL;
3792   vec<gimple> phis;
3793   bool slp_reduc = false;
3794   tree new_phi_result;
3795   gimple inner_phi = NULL;
3796
3797   if (slp_node)
3798     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
3799
3800   if (nested_in_vect_loop_p (loop, stmt))
3801     {
3802       outer_loop = loop;
3803       loop = loop->inner;
3804       nested_in_vect_loop = true;
3805       gcc_assert (!slp_node);
3806     }
3807
3808   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3809     {
3810     case GIMPLE_SINGLE_RHS:
3811       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3812                   == ternary_op);
3813       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3814       break;
3815     case GIMPLE_UNARY_RHS:
3816       reduction_op = gimple_assign_rhs1 (stmt);
3817       break;
3818     case GIMPLE_BINARY_RHS:
3819       reduction_op = reduc_index ?
3820                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3821       break;
3822     case GIMPLE_TERNARY_RHS:
3823       reduction_op = gimple_op (stmt, reduc_index + 1);
3824       break;
3825     default:
3826       gcc_unreachable ();
3827     }
3828
3829   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3830   gcc_assert (vectype);
3831   mode = TYPE_MODE (vectype);
3832
3833   /* 1. Create the reduction def-use cycle:
3834      Set the arguments of REDUCTION_PHIS, i.e., transform
3835
3836         loop:
3837           vec_def = phi <null, null>            # REDUCTION_PHI
3838           VECT_DEF = vector_stmt                # vectorized form of STMT
3839           ...
3840
3841      into:
3842
3843         loop:
3844           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3845           VECT_DEF = vector_stmt                # vectorized form of STMT
3846           ...
3847
3848      (in case of SLP, do it for all the phis). */
3849
3850   /* Get the loop-entry arguments.  */
3851   if (slp_node)
3852     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3853                        NULL, slp_node, reduc_index);
3854   else
3855     {
3856       vec_initial_defs.create (1);
3857      /* For the case of reduction, vect_get_vec_def_for_operand returns
3858         the scalar def before the loop, that defines the initial value
3859         of the reduction variable.  */
3860       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3861                                                       &adjustment_def);
3862       vec_initial_defs.quick_push (vec_initial_def);
3863     }
3864
3865   /* Set phi nodes arguments.  */
3866   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
3867     {
3868       tree vec_init_def = vec_initial_defs[i];
3869       tree def = vect_defs[i];
3870       for (j = 0; j < ncopies; j++)
3871         {
3872           /* Set the loop-entry arg of the reduction-phi.  */
3873           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3874                        UNKNOWN_LOCATION);
3875
3876           /* Set the loop-latch arg for the reduction-phi.  */
3877           if (j > 0)
3878             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3879
3880           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3881
3882           if (dump_enabled_p ())
3883             {
3884               dump_printf_loc (MSG_NOTE, vect_location,
3885                                "transform reduction: created def-use cycle: ");
3886               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3887               dump_printf (MSG_NOTE, "\n");
3888               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3889               dump_printf (MSG_NOTE, "\n");
3890             }
3891
3892           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3893         }
3894     }
3895
3896   vec_initial_defs.release ();
3897
3898   /* 2. Create epilog code.
3899         The reduction epilog code operates across the elements of the vector
3900         of partial results computed by the vectorized loop.
3901         The reduction epilog code consists of:
3902
3903         step 1: compute the scalar result in a vector (v_out2)
3904         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3905         step 3: adjust the scalar result (s_out3) if needed.
3906
3907         Step 1 can be accomplished using one the following three schemes:
3908           (scheme 1) using reduc_code, if available.
3909           (scheme 2) using whole-vector shifts, if available.
3910           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3911                      combined.
3912
3913           The overall epilog code looks like this:
3914
3915           s_out0 = phi <s_loop>         # original EXIT_PHI
3916           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3917           v_out2 = reduce <v_out1>              # step 1
3918           s_out3 = extract_field <v_out2, 0>    # step 2
3919           s_out4 = adjust_result <s_out3>       # step 3
3920
3921           (step 3 is optional, and steps 1 and 2 may be combined).
3922           Lastly, the uses of s_out0 are replaced by s_out4.  */
3923
3924
3925   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3926          v_out1 = phi <VECT_DEF>
3927          Store them in NEW_PHIS.  */
3928
3929   exit_bb = single_exit (loop)->dest;
3930   prev_phi_info = NULL;
3931   new_phis.create (vect_defs.length ());
3932   FOR_EACH_VEC_ELT (vect_defs, i, def)
3933     {
3934       for (j = 0; j < ncopies; j++)
3935         {
3936           tree new_def = copy_ssa_name (def, NULL);
3937           phi = create_phi_node (new_def, exit_bb);
3938           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3939           if (j == 0)
3940             new_phis.quick_push (phi);
3941           else
3942             {
3943               def = vect_get_vec_def_for_stmt_copy (dt, def);
3944               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3945             }
3946
3947           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3948           prev_phi_info = vinfo_for_stmt (phi);
3949         }
3950     }
3951
3952   /* The epilogue is created for the outer-loop, i.e., for the loop being
3953      vectorized.  Create exit phis for the outer loop.  */
3954   if (double_reduc)
3955     {
3956       loop = outer_loop;
3957       exit_bb = single_exit (loop)->dest;
3958       inner_phis.create (vect_defs.length ());
3959       FOR_EACH_VEC_ELT (new_phis, i, phi)
3960         {
3961           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3962           gimple outer_phi = create_phi_node (new_result, exit_bb);
3963           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3964                            PHI_RESULT (phi));
3965           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3966                                                             loop_vinfo, NULL));
3967           inner_phis.quick_push (phi);
3968           new_phis[i] = outer_phi;
3969           prev_phi_info = vinfo_for_stmt (outer_phi);
3970           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3971             {
3972               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3973               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3974               outer_phi = create_phi_node (new_result, exit_bb);
3975               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3976                                PHI_RESULT (phi));
3977               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3978                                                         loop_vinfo, NULL));
3979               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3980               prev_phi_info = vinfo_for_stmt (outer_phi);
3981             }
3982         }
3983     }
3984
3985   exit_gsi = gsi_after_labels (exit_bb);
3986
3987   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3988          (i.e. when reduc_code is not available) and in the final adjustment
3989          code (if needed).  Also get the original scalar reduction variable as
3990          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3991          represents a reduction pattern), the tree-code and scalar-def are
3992          taken from the original stmt that the pattern-stmt (STMT) replaces.
3993          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3994          are taken from STMT.  */
3995
3996   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3997   if (!orig_stmt)
3998     {
3999       /* Regular reduction  */
4000       orig_stmt = stmt;
4001     }
4002   else
4003     {
4004       /* Reduction pattern  */
4005       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4006       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4007       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4008     }
4009
4010   code = gimple_assign_rhs_code (orig_stmt);
4011   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4012      partial results are added and not subtracted.  */
4013   if (code == MINUS_EXPR)
4014     code = PLUS_EXPR;
4015
4016   scalar_dest = gimple_assign_lhs (orig_stmt);
4017   scalar_type = TREE_TYPE (scalar_dest);
4018   scalar_results.create (group_size);
4019   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4020   bitsize = TYPE_SIZE (scalar_type);
4021
4022   /* In case this is a reduction in an inner-loop while vectorizing an outer
4023      loop - we don't need to extract a single scalar result at the end of the
4024      inner-loop (unless it is double reduction, i.e., the use of reduction is
4025      outside the outer-loop).  The final vector of partial results will be used
4026      in the vectorized outer-loop, or reduced to a scalar result at the end of
4027      the outer-loop.  */
4028   if (nested_in_vect_loop && !double_reduc)
4029     goto vect_finalize_reduction;
4030
4031   /* SLP reduction without reduction chain, e.g.,
4032      # a1 = phi <a2, a0>
4033      # b1 = phi <b2, b0>
4034      a2 = operation (a1)
4035      b2 = operation (b1)  */
4036   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4037
4038   /* In case of reduction chain, e.g.,
4039      # a1 = phi <a3, a0>
4040      a2 = operation (a1)
4041      a3 = operation (a2),
4042
4043      we may end up with more than one vector result.  Here we reduce them to
4044      one vector.  */
4045   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4046     {
4047       tree first_vect = PHI_RESULT (new_phis[0]);
4048       tree tmp;
4049       gimple new_vec_stmt = NULL;
4050
4051       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4052       for (k = 1; k < new_phis.length (); k++)
4053         {
4054           gimple next_phi = new_phis[k];
4055           tree second_vect = PHI_RESULT (next_phi);
4056
4057           tmp = build2 (code, vectype,  first_vect, second_vect);
4058           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4059           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4060           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4061           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4062         }
4063
4064       new_phi_result = first_vect;
4065       if (new_vec_stmt)
4066         {
4067           new_phis.truncate (0);
4068           new_phis.safe_push (new_vec_stmt);
4069         }
4070     }
4071   else
4072     new_phi_result = PHI_RESULT (new_phis[0]);
4073
4074   /* 2.3 Create the reduction code, using one of the three schemes described
4075          above. In SLP we simply need to extract all the elements from the
4076          vector (without reducing them), so we use scalar shifts.  */
4077   if (reduc_code != ERROR_MARK && !slp_reduc)
4078     {
4079       tree tmp;
4080
4081       /*** Case 1:  Create:
4082            v_out2 = reduc_expr <v_out1>  */
4083
4084       if (dump_enabled_p ())
4085         dump_printf_loc (MSG_NOTE, vect_location,
4086                          "Reduce using direct vector reduction.\n");
4087
4088       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4089       tmp = build1 (reduc_code, vectype, new_phi_result);
4090       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4091       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4092       gimple_assign_set_lhs (epilog_stmt, new_temp);
4093       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4094
4095       extract_scalar_result = true;
4096     }
4097   else
4098     {
4099       enum tree_code shift_code = ERROR_MARK;
4100       bool have_whole_vector_shift = true;
4101       int bit_offset;
4102       int element_bitsize = tree_low_cst (bitsize, 1);
4103       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4104       tree vec_temp;
4105
4106       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4107         shift_code = VEC_RSHIFT_EXPR;
4108       else
4109         have_whole_vector_shift = false;
4110
4111       /* Regardless of whether we have a whole vector shift, if we're
4112          emulating the operation via tree-vect-generic, we don't want
4113          to use it.  Only the first round of the reduction is likely
4114          to still be profitable via emulation.  */
4115       /* ??? It might be better to emit a reduction tree code here, so that
4116          tree-vect-generic can expand the first round via bit tricks.  */
4117       if (!VECTOR_MODE_P (mode))
4118         have_whole_vector_shift = false;
4119       else
4120         {
4121           optab optab = optab_for_tree_code (code, vectype, optab_default);
4122           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4123             have_whole_vector_shift = false;
4124         }
4125
4126       if (have_whole_vector_shift && !slp_reduc)
4127         {
4128           /*** Case 2: Create:
4129              for (offset = VS/2; offset >= element_size; offset/=2)
4130                 {
4131                   Create:  va' = vec_shift <va, offset>
4132                   Create:  va = vop <va, va'>
4133                 }  */
4134
4135           if (dump_enabled_p ())
4136             dump_printf_loc (MSG_NOTE, vect_location,
4137                              "Reduce using vector shifts\n");
4138
4139           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4140           new_temp = new_phi_result;
4141           for (bit_offset = vec_size_in_bits/2;
4142                bit_offset >= element_bitsize;
4143                bit_offset /= 2)
4144             {
4145               tree bitpos = size_int (bit_offset);
4146
4147               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4148                                                vec_dest, new_temp, bitpos);
4149               new_name = make_ssa_name (vec_dest, epilog_stmt);
4150               gimple_assign_set_lhs (epilog_stmt, new_name);
4151               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4152
4153               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4154                                                           new_name, new_temp);
4155               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4156               gimple_assign_set_lhs (epilog_stmt, new_temp);
4157               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4158             }
4159
4160           extract_scalar_result = true;
4161         }
4162       else
4163         {
4164           tree rhs;
4165
4166           /*** Case 3: Create:
4167              s = extract_field <v_out2, 0>
4168              for (offset = element_size;
4169                   offset < vector_size;
4170                   offset += element_size;)
4171                {
4172                  Create:  s' = extract_field <v_out2, offset>
4173                  Create:  s = op <s, s'>  // For non SLP cases
4174                }  */
4175
4176           if (dump_enabled_p ())
4177             dump_printf_loc (MSG_NOTE, vect_location,
4178                              "Reduce using scalar code.\n");
4179
4180           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4181           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4182             {
4183               if (gimple_code (new_phi) == GIMPLE_PHI)
4184                 vec_temp = PHI_RESULT (new_phi);
4185               else
4186                 vec_temp = gimple_assign_lhs (new_phi);
4187               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4188                             bitsize_zero_node);
4189               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4190               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4191               gimple_assign_set_lhs (epilog_stmt, new_temp);
4192               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4193
4194               /* In SLP we don't need to apply reduction operation, so we just
4195                  collect s' values in SCALAR_RESULTS.  */
4196               if (slp_reduc)
4197                 scalar_results.safe_push (new_temp);
4198
4199               for (bit_offset = element_bitsize;
4200                    bit_offset < vec_size_in_bits;
4201                    bit_offset += element_bitsize)
4202                 {
4203                   tree bitpos = bitsize_int (bit_offset);
4204                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4205                                      bitsize, bitpos);
4206
4207                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4208                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4209                   gimple_assign_set_lhs (epilog_stmt, new_name);
4210                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4211
4212                   if (slp_reduc)
4213                     {
4214                       /* In SLP we don't need to apply reduction operation, so
4215                          we just collect s' values in SCALAR_RESULTS.  */
4216                       new_temp = new_name;
4217                       scalar_results.safe_push (new_name);
4218                     }
4219                   else
4220                     {
4221                       epilog_stmt = gimple_build_assign_with_ops (code,
4222                                           new_scalar_dest, new_name, new_temp);
4223                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4224                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4225                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4226                     }
4227                 }
4228             }
4229
4230           /* The only case where we need to reduce scalar results in SLP, is
4231              unrolling.  If the size of SCALAR_RESULTS is greater than
4232              GROUP_SIZE, we reduce them combining elements modulo
4233              GROUP_SIZE.  */
4234           if (slp_reduc)
4235             {
4236               tree res, first_res, new_res;
4237               gimple new_stmt;
4238
4239               /* Reduce multiple scalar results in case of SLP unrolling.  */
4240               for (j = group_size; scalar_results.iterate (j, &res);
4241                    j++)
4242                 {
4243                   first_res = scalar_results[j % group_size];
4244                   new_stmt = gimple_build_assign_with_ops (code,
4245                                               new_scalar_dest, first_res, res);
4246                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4247                   gimple_assign_set_lhs (new_stmt, new_res);
4248                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4249                   scalar_results[j % group_size] = new_res;
4250                 }
4251             }
4252           else
4253             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4254             scalar_results.safe_push (new_temp);
4255
4256           extract_scalar_result = false;
4257         }
4258     }
4259
4260   /* 2.4  Extract the final scalar result.  Create:
4261           s_out3 = extract_field <v_out2, bitpos>  */
4262
4263   if (extract_scalar_result)
4264     {
4265       tree rhs;
4266
4267       if (dump_enabled_p ())
4268         dump_printf_loc (MSG_NOTE, vect_location,
4269                          "extract scalar result\n");
4270
4271       if (BYTES_BIG_ENDIAN)
4272         bitpos = size_binop (MULT_EXPR,
4273                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4274                              TYPE_SIZE (scalar_type));
4275       else
4276         bitpos = bitsize_zero_node;
4277
4278       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4279       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4280       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4281       gimple_assign_set_lhs (epilog_stmt, new_temp);
4282       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4283       scalar_results.safe_push (new_temp);
4284     }
4285
4286 vect_finalize_reduction:
4287
4288   if (double_reduc)
4289     loop = loop->inner;
4290
4291   /* 2.5 Adjust the final result by the initial value of the reduction
4292          variable. (When such adjustment is not needed, then
4293          'adjustment_def' is zero).  For example, if code is PLUS we create:
4294          new_temp = loop_exit_def + adjustment_def  */
4295
4296   if (adjustment_def)
4297     {
4298       gcc_assert (!slp_reduc);
4299       if (nested_in_vect_loop)
4300         {
4301           new_phi = new_phis[0];
4302           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4303           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4304           new_dest = vect_create_destination_var (scalar_dest, vectype);
4305         }
4306       else
4307         {
4308           new_temp = scalar_results[0];
4309           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4310           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4311           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4312         }
4313
4314       epilog_stmt = gimple_build_assign (new_dest, expr);
4315       new_temp = make_ssa_name (new_dest, epilog_stmt);
4316       gimple_assign_set_lhs (epilog_stmt, new_temp);
4317       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4318       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4319       if (nested_in_vect_loop)
4320         {
4321           set_vinfo_for_stmt (epilog_stmt,
4322                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4323                                                  NULL));
4324           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4325                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4326
4327           if (!double_reduc)
4328             scalar_results.quick_push (new_temp);
4329           else
4330             scalar_results[0] = new_temp;
4331         }
4332       else
4333         scalar_results[0] = new_temp;
4334
4335       new_phis[0] = epilog_stmt;
4336     }
4337
4338   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4339           phis with new adjusted scalar results, i.e., replace use <s_out0>
4340           with use <s_out4>.
4341
4342      Transform:
4343         loop_exit:
4344           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4345           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4346           v_out2 = reduce <v_out1>
4347           s_out3 = extract_field <v_out2, 0>
4348           s_out4 = adjust_result <s_out3>
4349           use <s_out0>
4350           use <s_out0>
4351
4352      into:
4353
4354         loop_exit:
4355           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4356           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4357           v_out2 = reduce <v_out1>
4358           s_out3 = extract_field <v_out2, 0>
4359           s_out4 = adjust_result <s_out3>
4360           use <s_out4>
4361           use <s_out4> */
4362
4363
4364   /* In SLP reduction chain we reduce vector results into one vector if
4365      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4366      the last stmt in the reduction chain, since we are looking for the loop
4367      exit phi node.  */
4368   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4369     {
4370       scalar_dest = gimple_assign_lhs (
4371                         SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
4372       group_size = 1;
4373     }
4374
4375   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4376      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4377      need to match SCALAR_RESULTS with corresponding statements.  The first
4378      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4379      the first vector stmt, etc.
4380      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4381   if (group_size > new_phis.length ())
4382     {
4383       ratio = group_size / new_phis.length ();
4384       gcc_assert (!(group_size % new_phis.length ()));
4385     }
4386   else
4387     ratio = 1;
4388
4389   for (k = 0; k < group_size; k++)
4390     {
4391       if (k % ratio == 0)
4392         {
4393           epilog_stmt = new_phis[k / ratio];
4394           reduction_phi = reduction_phis[k / ratio];
4395           if (double_reduc)
4396             inner_phi = inner_phis[k / ratio];
4397         }
4398
4399       if (slp_reduc)
4400         {
4401           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4402
4403           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4404           /* SLP statements can't participate in patterns.  */
4405           gcc_assert (!orig_stmt);
4406           scalar_dest = gimple_assign_lhs (current_stmt);
4407         }
4408
4409       phis.create (3);
4410       /* Find the loop-closed-use at the loop exit of the original scalar
4411          result.  (The reduction result is expected to have two immediate uses -
4412          one at the latch block, and one at the loop exit).  */
4413       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4414         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4415             && !is_gimple_debug (USE_STMT (use_p)))
4416           phis.safe_push (USE_STMT (use_p));
4417
4418       /* While we expect to have found an exit_phi because of loop-closed-ssa
4419          form we can end up without one if the scalar cycle is dead.  */
4420
4421       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4422         {
4423           if (outer_loop)
4424             {
4425               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4426               gimple vect_phi;
4427
4428               /* FORNOW. Currently not supporting the case that an inner-loop
4429                  reduction is not used in the outer-loop (but only outside the
4430                  outer-loop), unless it is double reduction.  */
4431               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4432                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4433                           || double_reduc);
4434
4435               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4436               if (!double_reduc
4437                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4438                       != vect_double_reduction_def)
4439                 continue;
4440
4441               /* Handle double reduction:
4442
4443                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4444                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4445                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4446                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4447
4448                  At that point the regular reduction (stmt2 and stmt3) is
4449                  already vectorized, as well as the exit phi node, stmt4.
4450                  Here we vectorize the phi node of double reduction, stmt1, and
4451                  update all relevant statements.  */
4452
4453               /* Go through all the uses of s2 to find double reduction phi
4454                  node, i.e., stmt1 above.  */
4455               orig_name = PHI_RESULT (exit_phi);
4456               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4457                 {
4458                   stmt_vec_info use_stmt_vinfo;
4459                   stmt_vec_info new_phi_vinfo;
4460                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4461                   basic_block bb = gimple_bb (use_stmt);
4462                   gimple use;
4463
4464                   /* Check that USE_STMT is really double reduction phi
4465                      node.  */
4466                   if (gimple_code (use_stmt) != GIMPLE_PHI
4467                       || gimple_phi_num_args (use_stmt) != 2
4468                       || bb->loop_father != outer_loop)
4469                     continue;
4470                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4471                   if (!use_stmt_vinfo
4472                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4473                           != vect_double_reduction_def)
4474                     continue;
4475
4476                   /* Create vector phi node for double reduction:
4477                      vs1 = phi <vs0, vs2>
4478                      vs1 was created previously in this function by a call to
4479                        vect_get_vec_def_for_operand and is stored in
4480                        vec_initial_def;
4481                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4482                      vs0 is created here.  */
4483
4484                   /* Create vector phi node.  */
4485                   vect_phi = create_phi_node (vec_initial_def, bb);
4486                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4487                                     loop_vec_info_for_loop (outer_loop), NULL);
4488                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4489
4490                   /* Create vs0 - initial def of the double reduction phi.  */
4491                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4492                                              loop_preheader_edge (outer_loop));
4493                   init_def = get_initial_def_for_reduction (stmt,
4494                                                           preheader_arg, NULL);
4495                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4496                                                     vectype, NULL);
4497
4498                   /* Update phi node arguments with vs0 and vs2.  */
4499                   add_phi_arg (vect_phi, vect_phi_init,
4500                                loop_preheader_edge (outer_loop),
4501                                UNKNOWN_LOCATION);
4502                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4503                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4504                   if (dump_enabled_p ())
4505                     {
4506                       dump_printf_loc (MSG_NOTE, vect_location,
4507                                        "created double reduction phi node: ");
4508                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4509                       dump_printf (MSG_NOTE, "\n");
4510                     }
4511
4512                   vect_phi_res = PHI_RESULT (vect_phi);
4513
4514                   /* Replace the use, i.e., set the correct vs1 in the regular
4515                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4516                      loop is redundant.  */
4517                   use = reduction_phi;
4518                   for (j = 0; j < ncopies; j++)
4519                     {
4520                       edge pr_edge = loop_preheader_edge (loop);
4521                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4522                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4523                     }
4524                 }
4525             }
4526         }
4527
4528       phis.release ();
4529       if (nested_in_vect_loop)
4530         {
4531           if (double_reduc)
4532             loop = outer_loop;
4533           else
4534             continue;
4535         }
4536
4537       phis.create (3);
4538       /* Find the loop-closed-use at the loop exit of the original scalar
4539          result.  (The reduction result is expected to have two immediate uses,
4540          one at the latch block, and one at the loop exit).  For double
4541          reductions we are looking for exit phis of the outer loop.  */
4542       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4543         {
4544           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4545             {
4546               if (!is_gimple_debug (USE_STMT (use_p)))
4547                 phis.safe_push (USE_STMT (use_p));
4548             }
4549           else
4550             {
4551               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4552                 {
4553                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4554
4555                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4556                     {
4557                       if (!flow_bb_inside_loop_p (loop,
4558                                              gimple_bb (USE_STMT (phi_use_p)))
4559                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4560                         phis.safe_push (USE_STMT (phi_use_p));
4561                     }
4562                 }
4563             }
4564         }
4565
4566       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4567         {
4568           /* Replace the uses:  */
4569           orig_name = PHI_RESULT (exit_phi);
4570           scalar_result = scalar_results[k];
4571           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4572             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4573               SET_USE (use_p, scalar_result);
4574         }
4575
4576       phis.release ();
4577     }
4578
4579   scalar_results.release ();
4580   inner_phis.release ();
4581   new_phis.release ();
4582 }
4583
4584
4585 /* Function vectorizable_reduction.
4586
4587    Check if STMT performs a reduction operation that can be vectorized.
4588    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4589    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4590    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4591
4592    This function also handles reduction idioms (patterns) that have been
4593    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4594    of this form:
4595      X = pattern_expr (arg0, arg1, ..., X)
4596    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4597    sequence that had been detected and replaced by the pattern-stmt (STMT).
4598
4599    In some cases of reduction patterns, the type of the reduction variable X is
4600    different than the type of the other arguments of STMT.
4601    In such cases, the vectype that is used when transforming STMT into a vector
4602    stmt is different than the vectype that is used to determine the
4603    vectorization factor, because it consists of a different number of elements
4604    than the actual number of elements that are being operated upon in parallel.
4605
4606    For example, consider an accumulation of shorts into an int accumulator.
4607    On some targets it's possible to vectorize this pattern operating on 8
4608    shorts at a time (hence, the vectype for purposes of determining the
4609    vectorization factor should be V8HI); on the other hand, the vectype that
4610    is used to create the vector form is actually V4SI (the type of the result).
4611
4612    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4613    indicates what is the actual level of parallelism (V8HI in the example), so
4614    that the right vectorization factor would be derived.  This vectype
4615    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4616    be used to create the vectorized stmt.  The right vectype for the vectorized
4617    stmt is obtained from the type of the result X:
4618         get_vectype_for_scalar_type (TREE_TYPE (X))
4619
4620    This means that, contrary to "regular" reductions (or "regular" stmts in
4621    general), the following equation:
4622       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4623    does *NOT* necessarily hold for reduction patterns.  */
4624
4625 bool
4626 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4627                         gimple *vec_stmt, slp_tree slp_node)
4628 {
4629   tree vec_dest;
4630   tree scalar_dest;
4631   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4632   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4633   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4634   tree vectype_in = NULL_TREE;
4635   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4636   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4637   enum tree_code code, orig_code, epilog_reduc_code;
4638   enum machine_mode vec_mode;
4639   int op_type;
4640   optab optab, reduc_optab;
4641   tree new_temp = NULL_TREE;
4642   tree def;
4643   gimple def_stmt;
4644   enum vect_def_type dt;
4645   gimple new_phi = NULL;
4646   tree scalar_type;
4647   bool is_simple_use;
4648   gimple orig_stmt;
4649   stmt_vec_info orig_stmt_info;
4650   tree expr = NULL_TREE;
4651   int i;
4652   int ncopies;
4653   int epilog_copies;
4654   stmt_vec_info prev_stmt_info, prev_phi_info;
4655   bool single_defuse_cycle = false;
4656   tree reduc_def = NULL_TREE;
4657   gimple new_stmt = NULL;
4658   int j;
4659   tree ops[3];
4660   bool nested_cycle = false, found_nested_cycle_def = false;
4661   gimple reduc_def_stmt = NULL;
4662   /* The default is that the reduction variable is the last in statement.  */
4663   int reduc_index = 2;
4664   bool double_reduc = false, dummy;
4665   basic_block def_bb;
4666   struct loop * def_stmt_loop, *outer_loop = NULL;
4667   tree def_arg;
4668   gimple def_arg_stmt;
4669   vec<tree> vec_oprnds0 = vNULL;
4670   vec<tree> vec_oprnds1 = vNULL;
4671   vec<tree> vect_defs = vNULL;
4672   vec<gimple> phis = vNULL;
4673   int vec_num;
4674   tree def0, def1, tem, op0, op1 = NULL_TREE;
4675
4676   /* In case of reduction chain we switch to the first stmt in the chain, but
4677      we don't update STMT_INFO, since only the last stmt is marked as reduction
4678      and has reduction properties.  */
4679   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4680     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4681
4682   if (nested_in_vect_loop_p (loop, stmt))
4683     {
4684       outer_loop = loop;
4685       loop = loop->inner;
4686       nested_cycle = true;
4687     }
4688
4689   /* 1. Is vectorizable reduction?  */
4690   /* Not supportable if the reduction variable is used in the loop, unless
4691      it's a reduction chain.  */
4692   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4693       && !GROUP_FIRST_ELEMENT (stmt_info))
4694     return false;
4695
4696   /* Reductions that are not used even in an enclosing outer-loop,
4697      are expected to be "live" (used out of the loop).  */
4698   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4699       && !STMT_VINFO_LIVE_P (stmt_info))
4700     return false;
4701
4702   /* Make sure it was already recognized as a reduction computation.  */
4703   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4704       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4705     return false;
4706
4707   /* 2. Has this been recognized as a reduction pattern?
4708
4709      Check if STMT represents a pattern that has been recognized
4710      in earlier analysis stages.  For stmts that represent a pattern,
4711      the STMT_VINFO_RELATED_STMT field records the last stmt in
4712      the original sequence that constitutes the pattern.  */
4713
4714   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4715   if (orig_stmt)
4716     {
4717       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4718       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4719       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4720     }
4721
4722   /* 3. Check the operands of the operation.  The first operands are defined
4723         inside the loop body. The last operand is the reduction variable,
4724         which is defined by the loop-header-phi.  */
4725
4726   gcc_assert (is_gimple_assign (stmt));
4727
4728   /* Flatten RHS.  */
4729   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4730     {
4731     case GIMPLE_SINGLE_RHS:
4732       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4733       if (op_type == ternary_op)
4734         {
4735           tree rhs = gimple_assign_rhs1 (stmt);
4736           ops[0] = TREE_OPERAND (rhs, 0);
4737           ops[1] = TREE_OPERAND (rhs, 1);
4738           ops[2] = TREE_OPERAND (rhs, 2);
4739           code = TREE_CODE (rhs);
4740         }
4741       else
4742         return false;
4743       break;
4744
4745     case GIMPLE_BINARY_RHS:
4746       code = gimple_assign_rhs_code (stmt);
4747       op_type = TREE_CODE_LENGTH (code);
4748       gcc_assert (op_type == binary_op);
4749       ops[0] = gimple_assign_rhs1 (stmt);
4750       ops[1] = gimple_assign_rhs2 (stmt);
4751       break;
4752
4753     case GIMPLE_TERNARY_RHS:
4754       code = gimple_assign_rhs_code (stmt);
4755       op_type = TREE_CODE_LENGTH (code);
4756       gcc_assert (op_type == ternary_op);
4757       ops[0] = gimple_assign_rhs1 (stmt);
4758       ops[1] = gimple_assign_rhs2 (stmt);
4759       ops[2] = gimple_assign_rhs3 (stmt);
4760       break;
4761
4762     case GIMPLE_UNARY_RHS:
4763       return false;
4764
4765     default:
4766       gcc_unreachable ();
4767     }
4768
4769   if (code == COND_EXPR && slp_node)
4770     return false;
4771
4772   scalar_dest = gimple_assign_lhs (stmt);
4773   scalar_type = TREE_TYPE (scalar_dest);
4774   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4775       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4776     return false;
4777
4778   /* Do not try to vectorize bit-precision reductions.  */
4779   if ((TYPE_PRECISION (scalar_type)
4780        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4781     return false;
4782
4783   /* All uses but the last are expected to be defined in the loop.
4784      The last use is the reduction variable.  In case of nested cycle this
4785      assumption is not true: we use reduc_index to record the index of the
4786      reduction variable.  */
4787   for (i = 0; i < op_type - 1; i++)
4788     {
4789       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4790       if (i == 0 && code == COND_EXPR)
4791         continue;
4792
4793       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4794                                             &def_stmt, &def, &dt, &tem);
4795       if (!vectype_in)
4796         vectype_in = tem;
4797       gcc_assert (is_simple_use);
4798
4799       if (dt != vect_internal_def
4800           && dt != vect_external_def
4801           && dt != vect_constant_def
4802           && dt != vect_induction_def
4803           && !(dt == vect_nested_cycle && nested_cycle))
4804         return false;
4805
4806       if (dt == vect_nested_cycle)
4807         {
4808           found_nested_cycle_def = true;
4809           reduc_def_stmt = def_stmt;
4810           reduc_index = i;
4811         }
4812     }
4813
4814   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4815                                         &def_stmt, &def, &dt, &tem);
4816   if (!vectype_in)
4817     vectype_in = tem;
4818   gcc_assert (is_simple_use);
4819   if (!(dt == vect_reduction_def
4820         || dt == vect_nested_cycle
4821         || ((dt == vect_internal_def || dt == vect_external_def
4822              || dt == vect_constant_def || dt == vect_induction_def)
4823             && nested_cycle && found_nested_cycle_def)))
4824     {
4825       /* For pattern recognized stmts, orig_stmt might be a reduction,
4826          but some helper statements for the pattern might not, or
4827          might be COND_EXPRs with reduction uses in the condition.  */
4828       gcc_assert (orig_stmt);
4829       return false;
4830     }
4831   if (!found_nested_cycle_def)
4832     reduc_def_stmt = def_stmt;
4833
4834   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4835   if (orig_stmt)
4836     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4837                                                        reduc_def_stmt,
4838                                                        !nested_cycle,
4839                                                        &dummy));
4840   else
4841     {
4842       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4843                                              !nested_cycle, &dummy);
4844       /* We changed STMT to be the first stmt in reduction chain, hence we
4845          check that in this case the first element in the chain is STMT.  */
4846       gcc_assert (stmt == tmp
4847                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4848     }
4849
4850   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4851     return false;
4852
4853   if (slp_node || PURE_SLP_STMT (stmt_info))
4854     ncopies = 1;
4855   else
4856     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4857                / TYPE_VECTOR_SUBPARTS (vectype_in));
4858
4859   gcc_assert (ncopies >= 1);
4860
4861   vec_mode = TYPE_MODE (vectype_in);
4862
4863   if (code == COND_EXPR)
4864     {
4865       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4866         {
4867           if (dump_enabled_p ())
4868             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4869                              "unsupported condition in reduction\n");
4870
4871             return false;
4872         }
4873     }
4874   else
4875     {
4876       /* 4. Supportable by target?  */
4877
4878       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
4879           || code == LROTATE_EXPR || code == RROTATE_EXPR)
4880         {
4881           /* Shifts and rotates are only supported by vectorizable_shifts,
4882              not vectorizable_reduction.  */
4883           if (dump_enabled_p ())
4884             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4885                              "unsupported shift or rotation.\n");
4886           return false;
4887         }
4888
4889       /* 4.1. check support for the operation in the loop  */
4890       optab = optab_for_tree_code (code, vectype_in, optab_default);
4891       if (!optab)
4892         {
4893           if (dump_enabled_p ())
4894             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4895                              "no optab.\n");
4896
4897           return false;
4898         }
4899
4900       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4901         {
4902           if (dump_enabled_p ())
4903             dump_printf (MSG_NOTE, "op not supported by target.\n");
4904
4905           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4906               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4907                   < vect_min_worthwhile_factor (code))
4908             return false;
4909
4910           if (dump_enabled_p ())
4911             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
4912         }
4913
4914       /* Worthwhile without SIMD support?  */
4915       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4916           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4917              < vect_min_worthwhile_factor (code))
4918         {
4919           if (dump_enabled_p ())
4920             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921                              "not worthwhile without SIMD support.\n");
4922
4923           return false;
4924         }
4925     }
4926
4927   /* 4.2. Check support for the epilog operation.
4928
4929           If STMT represents a reduction pattern, then the type of the
4930           reduction variable may be different than the type of the rest
4931           of the arguments.  For example, consider the case of accumulation
4932           of shorts into an int accumulator; The original code:
4933                         S1: int_a = (int) short_a;
4934           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4935
4936           was replaced with:
4937                         STMT: int_acc = widen_sum <short_a, int_acc>
4938
4939           This means that:
4940           1. The tree-code that is used to create the vector operation in the
4941              epilog code (that reduces the partial results) is not the
4942              tree-code of STMT, but is rather the tree-code of the original
4943              stmt from the pattern that STMT is replacing.  I.e, in the example
4944              above we want to use 'widen_sum' in the loop, but 'plus' in the
4945              epilog.
4946           2. The type (mode) we use to check available target support
4947              for the vector operation to be created in the *epilog*, is
4948              determined by the type of the reduction variable (in the example
4949              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4950              However the type (mode) we use to check available target support
4951              for the vector operation to be created *inside the loop*, is
4952              determined by the type of the other arguments to STMT (in the
4953              example we'd check this: optab_handler (widen_sum_optab,
4954              vect_short_mode)).
4955
4956           This is contrary to "regular" reductions, in which the types of all
4957           the arguments are the same as the type of the reduction variable.
4958           For "regular" reductions we can therefore use the same vector type
4959           (and also the same tree-code) when generating the epilog code and
4960           when generating the code inside the loop.  */
4961
4962   if (orig_stmt)
4963     {
4964       /* This is a reduction pattern: get the vectype from the type of the
4965          reduction variable, and get the tree-code from orig_stmt.  */
4966       orig_code = gimple_assign_rhs_code (orig_stmt);
4967       gcc_assert (vectype_out);
4968       vec_mode = TYPE_MODE (vectype_out);
4969     }
4970   else
4971     {
4972       /* Regular reduction: use the same vectype and tree-code as used for
4973          the vector code inside the loop can be used for the epilog code. */
4974       orig_code = code;
4975     }
4976
4977   if (nested_cycle)
4978     {
4979       def_bb = gimple_bb (reduc_def_stmt);
4980       def_stmt_loop = def_bb->loop_father;
4981       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4982                                        loop_preheader_edge (def_stmt_loop));
4983       if (TREE_CODE (def_arg) == SSA_NAME
4984           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4985           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4986           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4987           && vinfo_for_stmt (def_arg_stmt)
4988           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4989               == vect_double_reduction_def)
4990         double_reduc = true;
4991     }
4992
4993   epilog_reduc_code = ERROR_MARK;
4994   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4995     {
4996       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4997                                          optab_default);
4998       if (!reduc_optab)
4999         {
5000           if (dump_enabled_p ())
5001             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5002                              "no optab for reduction.\n");
5003
5004           epilog_reduc_code = ERROR_MARK;
5005         }
5006
5007       if (reduc_optab
5008           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5009         {
5010           if (dump_enabled_p ())
5011             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5012                              "reduc op not supported by target.\n");
5013
5014           epilog_reduc_code = ERROR_MARK;
5015         }
5016     }
5017   else
5018     {
5019       if (!nested_cycle || double_reduc)
5020         {
5021           if (dump_enabled_p ())
5022             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5023                              "no reduc code for scalar code.\n");
5024
5025           return false;
5026         }
5027     }
5028
5029   if (double_reduc && ncopies > 1)
5030     {
5031       if (dump_enabled_p ())
5032         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5033                          "multiple types in double reduction\n");
5034
5035       return false;
5036     }
5037
5038   /* In case of widenning multiplication by a constant, we update the type
5039      of the constant to be the type of the other operand.  We check that the
5040      constant fits the type in the pattern recognition pass.  */
5041   if (code == DOT_PROD_EXPR
5042       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5043     {
5044       if (TREE_CODE (ops[0]) == INTEGER_CST)
5045         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5046       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5047         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5048       else
5049         {
5050           if (dump_enabled_p ())
5051             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5052                              "invalid types in dot-prod\n");
5053
5054           return false;
5055         }
5056     }
5057
5058   if (!vec_stmt) /* transformation not required.  */
5059     {
5060       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
5061         return false;
5062       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5063       return true;
5064     }
5065
5066   /** Transform.  **/
5067
5068   if (dump_enabled_p ())
5069     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5070
5071   /* FORNOW: Multiple types are not supported for condition.  */
5072   if (code == COND_EXPR)
5073     gcc_assert (ncopies == 1);
5074
5075   /* Create the destination vector  */
5076   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5077
5078   /* In case the vectorization factor (VF) is bigger than the number
5079      of elements that we can fit in a vectype (nunits), we have to generate
5080      more than one vector stmt - i.e - we need to "unroll" the
5081      vector stmt by a factor VF/nunits.  For more details see documentation
5082      in vectorizable_operation.  */
5083
5084   /* If the reduction is used in an outer loop we need to generate
5085      VF intermediate results, like so (e.g. for ncopies=2):
5086         r0 = phi (init, r0)
5087         r1 = phi (init, r1)
5088         r0 = x0 + r0;
5089         r1 = x1 + r1;
5090     (i.e. we generate VF results in 2 registers).
5091     In this case we have a separate def-use cycle for each copy, and therefore
5092     for each copy we get the vector def for the reduction variable from the
5093     respective phi node created for this copy.
5094
5095     Otherwise (the reduction is unused in the loop nest), we can combine
5096     together intermediate results, like so (e.g. for ncopies=2):
5097         r = phi (init, r)
5098         r = x0 + r;
5099         r = x1 + r;
5100    (i.e. we generate VF/2 results in a single register).
5101    In this case for each copy we get the vector def for the reduction variable
5102    from the vectorized reduction operation generated in the previous iteration.
5103   */
5104
5105   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5106     {
5107       single_defuse_cycle = true;
5108       epilog_copies = 1;
5109     }
5110   else
5111     epilog_copies = ncopies;
5112
5113   prev_stmt_info = NULL;
5114   prev_phi_info = NULL;
5115   if (slp_node)
5116     {
5117       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5118       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5119                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5120     }
5121   else
5122     {
5123       vec_num = 1;
5124       vec_oprnds0.create (1);
5125       if (op_type == ternary_op)
5126         vec_oprnds1.create (1);
5127     }
5128
5129   phis.create (vec_num);
5130   vect_defs.create (vec_num);
5131   if (!slp_node)
5132     vect_defs.quick_push (NULL_TREE);
5133
5134   for (j = 0; j < ncopies; j++)
5135     {
5136       if (j == 0 || !single_defuse_cycle)
5137         {
5138           for (i = 0; i < vec_num; i++)
5139             {
5140               /* Create the reduction-phi that defines the reduction
5141                  operand.  */
5142               new_phi = create_phi_node (vec_dest, loop->header);
5143               set_vinfo_for_stmt (new_phi,
5144                                   new_stmt_vec_info (new_phi, loop_vinfo,
5145                                                      NULL));
5146                if (j == 0 || slp_node)
5147                  phis.quick_push (new_phi);
5148             }
5149         }
5150
5151       if (code == COND_EXPR)
5152         {
5153           gcc_assert (!slp_node);
5154           vectorizable_condition (stmt, gsi, vec_stmt,
5155                                   PHI_RESULT (phis[0]),
5156                                   reduc_index, NULL);
5157           /* Multiple types are not supported for condition.  */
5158           break;
5159         }
5160
5161       /* Handle uses.  */
5162       if (j == 0)
5163         {
5164           op0 = ops[!reduc_index];
5165           if (op_type == ternary_op)
5166             {
5167               if (reduc_index == 0)
5168                 op1 = ops[2];
5169               else
5170                 op1 = ops[1];
5171             }
5172
5173           if (slp_node)
5174             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5175                                slp_node, -1);
5176           else
5177             {
5178               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5179                                                             stmt, NULL);
5180               vec_oprnds0.quick_push (loop_vec_def0);
5181               if (op_type == ternary_op)
5182                {
5183                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5184                                                                NULL);
5185                  vec_oprnds1.quick_push (loop_vec_def1);
5186                }
5187             }
5188         }
5189       else
5190         {
5191           if (!slp_node)
5192             {
5193               enum vect_def_type dt;
5194               gimple dummy_stmt;
5195               tree dummy;
5196
5197               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5198                                   &dummy_stmt, &dummy, &dt);
5199               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5200                                                               loop_vec_def0);
5201               vec_oprnds0[0] = loop_vec_def0;
5202               if (op_type == ternary_op)
5203                 {
5204                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5205                                       &dummy, &dt);
5206                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5207                                                                 loop_vec_def1);
5208                   vec_oprnds1[0] = loop_vec_def1;
5209                 }
5210             }
5211
5212           if (single_defuse_cycle)
5213             reduc_def = gimple_assign_lhs (new_stmt);
5214
5215           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5216         }
5217
5218       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5219         {
5220           if (slp_node)
5221             reduc_def = PHI_RESULT (phis[i]);
5222           else
5223             {
5224               if (!single_defuse_cycle || j == 0)
5225                 reduc_def = PHI_RESULT (new_phi);
5226             }
5227
5228           def1 = ((op_type == ternary_op)
5229                   ? vec_oprnds1[i] : NULL);
5230           if (op_type == binary_op)
5231             {
5232               if (reduc_index == 0)
5233                 expr = build2 (code, vectype_out, reduc_def, def0);
5234               else
5235                 expr = build2 (code, vectype_out, def0, reduc_def);
5236             }
5237           else
5238             {
5239               if (reduc_index == 0)
5240                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5241               else
5242                 {
5243                   if (reduc_index == 1)
5244                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5245                   else
5246                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5247                 }
5248             }
5249
5250           new_stmt = gimple_build_assign (vec_dest, expr);
5251           new_temp = make_ssa_name (vec_dest, new_stmt);
5252           gimple_assign_set_lhs (new_stmt, new_temp);
5253           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5254
5255           if (slp_node)
5256             {
5257               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5258               vect_defs.quick_push (new_temp);
5259             }
5260           else
5261             vect_defs[0] = new_temp;
5262         }
5263
5264       if (slp_node)
5265         continue;
5266
5267       if (j == 0)
5268         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5269       else
5270         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5271
5272       prev_stmt_info = vinfo_for_stmt (new_stmt);
5273       prev_phi_info = vinfo_for_stmt (new_phi);
5274     }
5275
5276   /* Finalize the reduction-phi (set its arguments) and create the
5277      epilog reduction code.  */
5278   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5279     {
5280       new_temp = gimple_assign_lhs (*vec_stmt);
5281       vect_defs[0] = new_temp;
5282     }
5283
5284   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5285                                     epilog_reduc_code, phis, reduc_index,
5286                                     double_reduc, slp_node);
5287
5288   phis.release ();
5289   vect_defs.release ();
5290   vec_oprnds0.release ();
5291   vec_oprnds1.release ();
5292
5293   return true;
5294 }
5295
5296 /* Function vect_min_worthwhile_factor.
5297
5298    For a loop where we could vectorize the operation indicated by CODE,
5299    return the minimum vectorization factor that makes it worthwhile
5300    to use generic vectors.  */
5301 int
5302 vect_min_worthwhile_factor (enum tree_code code)
5303 {
5304   switch (code)
5305     {
5306     case PLUS_EXPR:
5307     case MINUS_EXPR:
5308     case NEGATE_EXPR:
5309       return 4;
5310
5311     case BIT_AND_EXPR:
5312     case BIT_IOR_EXPR:
5313     case BIT_XOR_EXPR:
5314     case BIT_NOT_EXPR:
5315       return 2;
5316
5317     default:
5318       return INT_MAX;
5319     }
5320 }
5321
5322
5323 /* Function vectorizable_induction
5324
5325    Check if PHI performs an induction computation that can be vectorized.
5326    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5327    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5328    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5329
5330 bool
5331 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5332                         gimple *vec_stmt)
5333 {
5334   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5335   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5336   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5337   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5338   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5339   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5340   tree vec_def;
5341
5342   gcc_assert (ncopies >= 1);
5343   /* FORNOW. These restrictions should be relaxed.  */
5344   if (nested_in_vect_loop_p (loop, phi))
5345     {
5346       imm_use_iterator imm_iter;
5347       use_operand_p use_p;
5348       gimple exit_phi;
5349       edge latch_e;
5350       tree loop_arg;
5351
5352       if (ncopies > 1)
5353         {
5354           if (dump_enabled_p ())
5355             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356                              "multiple types in nested loop.\n");
5357           return false;
5358         }
5359
5360       exit_phi = NULL;
5361       latch_e = loop_latch_edge (loop->inner);
5362       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5363       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5364         {
5365           if (!flow_bb_inside_loop_p (loop->inner,
5366                                       gimple_bb (USE_STMT (use_p))))
5367             {
5368               exit_phi = USE_STMT (use_p);
5369               break;
5370             }
5371         }
5372       if (exit_phi)
5373         {
5374           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5375           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5376                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5377             {
5378               if (dump_enabled_p ())
5379                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5380                                  "inner-loop induction only used outside "
5381                                  "of the outer vectorized loop.\n");
5382               return false;
5383             }
5384         }
5385     }
5386
5387   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5388     return false;
5389
5390   /* FORNOW: SLP not supported.  */
5391   if (STMT_SLP_TYPE (stmt_info))
5392     return false;
5393
5394   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5395
5396   if (gimple_code (phi) != GIMPLE_PHI)
5397     return false;
5398
5399   if (!vec_stmt) /* transformation not required.  */
5400     {
5401       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5402       if (dump_enabled_p ())
5403         dump_printf_loc (MSG_NOTE, vect_location,
5404                          "=== vectorizable_induction ===\n");
5405       vect_model_induction_cost (stmt_info, ncopies);
5406       return true;
5407     }
5408
5409   /** Transform.  **/
5410
5411   if (dump_enabled_p ())
5412     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5413
5414   vec_def = get_initial_def_for_induction (phi);
5415   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5416   return true;
5417 }
5418
5419 /* Function vectorizable_live_operation.
5420
5421    STMT computes a value that is used outside the loop.  Check if
5422    it can be supported.  */
5423
5424 bool
5425 vectorizable_live_operation (gimple stmt,
5426                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5427                              gimple *vec_stmt)
5428 {
5429   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5430   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5431   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5432   int i;
5433   int op_type;
5434   tree op;
5435   tree def;
5436   gimple def_stmt;
5437   enum vect_def_type dt;
5438   enum tree_code code;
5439   enum gimple_rhs_class rhs_class;
5440
5441   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5442
5443   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5444     return false;
5445
5446   if (!is_gimple_assign (stmt))
5447     {
5448       if (gimple_call_internal_p (stmt)
5449           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5450           && gimple_call_lhs (stmt)
5451           && loop->simduid
5452           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5453           && loop->simduid
5454              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5455         {
5456           edge e = single_exit (loop);
5457           basic_block merge_bb = e->dest;
5458           imm_use_iterator imm_iter;
5459           use_operand_p use_p;
5460           tree lhs = gimple_call_lhs (stmt);
5461
5462           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5463             {
5464               gimple use_stmt = USE_STMT (use_p);
5465               if (gimple_code (use_stmt) == GIMPLE_PHI
5466                   || gimple_bb (use_stmt) == merge_bb)
5467                 {
5468                   if (vec_stmt)
5469                     {
5470                       tree vfm1
5471                         = build_int_cst (unsigned_type_node,
5472                                          loop_vinfo->vectorization_factor - 1);
5473                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5474                     }
5475                   return true;
5476                 }
5477             }
5478         }
5479
5480       return false;
5481     }
5482
5483   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5484     return false;
5485
5486   /* FORNOW. CHECKME. */
5487   if (nested_in_vect_loop_p (loop, stmt))
5488     return false;
5489
5490   code = gimple_assign_rhs_code (stmt);
5491   op_type = TREE_CODE_LENGTH (code);
5492   rhs_class = get_gimple_rhs_class (code);
5493   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5494   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5495
5496   /* FORNOW: support only if all uses are invariant.  This means
5497      that the scalar operations can remain in place, unvectorized.
5498      The original last scalar value that they compute will be used.  */
5499
5500   for (i = 0; i < op_type; i++)
5501     {
5502       if (rhs_class == GIMPLE_SINGLE_RHS)
5503         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5504       else
5505         op = gimple_op (stmt, i + 1);
5506       if (op
5507           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5508                                   &dt))
5509         {
5510           if (dump_enabled_p ())
5511             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5512                              "use not simple.\n");
5513           return false;
5514         }
5515
5516       if (dt != vect_external_def && dt != vect_constant_def)
5517         return false;
5518     }
5519
5520   /* No transformation is required for the cases we currently support.  */
5521   return true;
5522 }
5523
5524 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5525
5526 static void
5527 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5528 {
5529   ssa_op_iter op_iter;
5530   imm_use_iterator imm_iter;
5531   def_operand_p def_p;
5532   gimple ustmt;
5533
5534   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5535     {
5536       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5537         {
5538           basic_block bb;
5539
5540           if (!is_gimple_debug (ustmt))
5541             continue;
5542
5543           bb = gimple_bb (ustmt);
5544
5545           if (!flow_bb_inside_loop_p (loop, bb))
5546             {
5547               if (gimple_debug_bind_p (ustmt))
5548                 {
5549                   if (dump_enabled_p ())
5550                     dump_printf_loc (MSG_NOTE, vect_location,
5551                                      "killing debug use\n");
5552
5553                   gimple_debug_bind_reset_value (ustmt);
5554                   update_stmt (ustmt);
5555                 }
5556               else
5557                 gcc_unreachable ();
5558             }
5559         }
5560     }
5561 }
5562
5563 /* Function vect_transform_loop.
5564
5565    The analysis phase has determined that the loop is vectorizable.
5566    Vectorize the loop - created vectorized stmts to replace the scalar
5567    stmts in the loop, and update the loop exit condition.  */
5568
5569 void
5570 vect_transform_loop (loop_vec_info loop_vinfo)
5571 {
5572   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5573   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5574   int nbbs = loop->num_nodes;
5575   gimple_stmt_iterator si;
5576   int i;
5577   tree ratio = NULL;
5578   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5579   bool grouped_store;
5580   bool slp_scheduled = false;
5581   unsigned int nunits;
5582   gimple stmt, pattern_stmt;
5583   gimple_seq pattern_def_seq = NULL;
5584   gimple_stmt_iterator pattern_def_si = gsi_none ();
5585   bool transform_pattern_stmt = false;
5586   bool check_profitability = false;
5587   int th;
5588   /* Record number of iterations before we started tampering with the profile. */
5589   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5590
5591   if (dump_enabled_p ())
5592     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5593
5594   /* If profile is inprecise, we have chance to fix it up.  */
5595   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5596     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5597
5598   /* Use the more conservative vectorization threshold.  If the number
5599      of iterations is constant assume the cost check has been performed
5600      by our caller.  If the threshold makes all loops profitable that
5601      run at least the vectorization factor number of times checking
5602      is pointless, too.  */
5603   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5604          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5605   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5606   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5607       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5608     {
5609       if (dump_enabled_p ())
5610         dump_printf_loc (MSG_NOTE, vect_location,
5611                          "Profitability threshold is %d loop iterations.\n",
5612                          th);
5613       check_profitability = true;
5614     }
5615
5616   /* Version the loop first, if required, so the profitability check
5617      comes first.  */
5618
5619   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5620       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5621     {
5622       vect_loop_versioning (loop_vinfo, th, check_profitability);
5623       check_profitability = false;
5624     }
5625
5626   /* Peel the loop if there are data refs with unknown alignment.
5627      Only one data ref with unknown store is allowed.  */
5628
5629   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5630     {
5631       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5632       check_profitability = false;
5633     }
5634
5635   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5636      compile time constant), or it is a constant that doesn't divide by the
5637      vectorization factor, then an epilog loop needs to be created.
5638      We therefore duplicate the loop: the original loop will be vectorized,
5639      and will compute the first (n/VF) iterations.  The second copy of the loop
5640      will remain scalar and will compute the remaining (n%VF) iterations.
5641      (VF is the vectorization factor).  */
5642
5643   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5644        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5645            && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5646        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5647     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5648                                     th, check_profitability);
5649   else
5650     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5651                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5652
5653   /* 1) Make sure the loop header has exactly two entries
5654      2) Make sure we have a preheader basic block.  */
5655
5656   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5657
5658   split_edge (loop_preheader_edge (loop));
5659
5660   /* FORNOW: the vectorizer supports only loops which body consist
5661      of one basic block (header + empty latch). When the vectorizer will
5662      support more involved loop forms, the order by which the BBs are
5663      traversed need to be reconsidered.  */
5664
5665   for (i = 0; i < nbbs; i++)
5666     {
5667       basic_block bb = bbs[i];
5668       stmt_vec_info stmt_info;
5669       gimple phi;
5670
5671       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5672         {
5673           phi = gsi_stmt (si);
5674           if (dump_enabled_p ())
5675             {
5676               dump_printf_loc (MSG_NOTE, vect_location,
5677                                "------>vectorizing phi: ");
5678               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5679               dump_printf (MSG_NOTE, "\n");
5680             }
5681           stmt_info = vinfo_for_stmt (phi);
5682           if (!stmt_info)
5683             continue;
5684
5685           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5686             vect_loop_kill_debug_uses (loop, phi);
5687
5688           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5689               && !STMT_VINFO_LIVE_P (stmt_info))
5690             continue;
5691
5692           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5693                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5694               && dump_enabled_p ())
5695             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
5696
5697           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5698             {
5699               if (dump_enabled_p ())
5700                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
5701               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5702             }
5703         }
5704
5705       pattern_stmt = NULL;
5706       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5707         {
5708           bool is_store;
5709
5710           if (transform_pattern_stmt)
5711             stmt = pattern_stmt;
5712           else
5713             {
5714               stmt = gsi_stmt (si);
5715               /* During vectorization remove existing clobber stmts.  */
5716               if (gimple_clobber_p (stmt))
5717                 {
5718                   unlink_stmt_vdef (stmt);
5719                   gsi_remove (&si, true);
5720                   release_defs (stmt);
5721                   continue;
5722                 }
5723             }
5724
5725           if (dump_enabled_p ())
5726             {
5727               dump_printf_loc (MSG_NOTE, vect_location,
5728                                "------>vectorizing statement: ");
5729               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5730               dump_printf (MSG_NOTE, "\n");
5731             }
5732
5733           stmt_info = vinfo_for_stmt (stmt);
5734
5735           /* vector stmts created in the outer-loop during vectorization of
5736              stmts in an inner-loop may not have a stmt_info, and do not
5737              need to be vectorized.  */
5738           if (!stmt_info)
5739             {
5740               gsi_next (&si);
5741               continue;
5742             }
5743
5744           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5745             vect_loop_kill_debug_uses (loop, stmt);
5746
5747           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5748               && !STMT_VINFO_LIVE_P (stmt_info))
5749             {
5750               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5751                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5752                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5753                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5754                 {
5755                   stmt = pattern_stmt;
5756                   stmt_info = vinfo_for_stmt (stmt);
5757                 }
5758               else
5759                 {
5760                   gsi_next (&si);
5761                   continue;
5762                 }
5763             }
5764           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5765                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5766                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5767                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5768             transform_pattern_stmt = true;
5769
5770           /* If pattern statement has def stmts, vectorize them too.  */
5771           if (is_pattern_stmt_p (stmt_info))
5772             {
5773               if (pattern_def_seq == NULL)
5774                 {
5775                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5776                   pattern_def_si = gsi_start (pattern_def_seq);
5777                 }
5778               else if (!gsi_end_p (pattern_def_si))
5779                 gsi_next (&pattern_def_si);
5780               if (pattern_def_seq != NULL)
5781                 {
5782                   gimple pattern_def_stmt = NULL;
5783                   stmt_vec_info pattern_def_stmt_info = NULL;
5784
5785                   while (!gsi_end_p (pattern_def_si))
5786                     {
5787                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5788                       pattern_def_stmt_info
5789                         = vinfo_for_stmt (pattern_def_stmt);
5790                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5791                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5792                         break;
5793                       gsi_next (&pattern_def_si);
5794                     }
5795
5796                   if (!gsi_end_p (pattern_def_si))
5797                     {
5798                       if (dump_enabled_p ())
5799                         {
5800                           dump_printf_loc (MSG_NOTE, vect_location,
5801                                            "==> vectorizing pattern def "
5802                                            "stmt: ");
5803                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5804                                             pattern_def_stmt, 0);
5805                           dump_printf (MSG_NOTE, "\n");
5806                         }
5807
5808                       stmt = pattern_def_stmt;
5809                       stmt_info = pattern_def_stmt_info;
5810                     }
5811                   else
5812                     {
5813                       pattern_def_si = gsi_none ();
5814                       transform_pattern_stmt = false;
5815                     }
5816                 }
5817               else
5818                 transform_pattern_stmt = false;
5819             }
5820
5821           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5822           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5823                                                STMT_VINFO_VECTYPE (stmt_info));
5824           if (!STMT_SLP_TYPE (stmt_info)
5825               && nunits != (unsigned int) vectorization_factor
5826               && dump_enabled_p ())
5827             /* For SLP VF is set according to unrolling factor, and not to
5828                vector size, hence for SLP this print is not valid.  */
5829             dump_printf_loc (MSG_NOTE, vect_location,
5830                              "multiple-types.\n");
5831
5832           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5833              reached.  */
5834           if (STMT_SLP_TYPE (stmt_info))
5835             {
5836               if (!slp_scheduled)
5837                 {
5838                   slp_scheduled = true;
5839
5840                   if (dump_enabled_p ())
5841                     dump_printf_loc (MSG_NOTE, vect_location,
5842                                      "=== scheduling SLP instances ===\n");
5843
5844                   vect_schedule_slp (loop_vinfo, NULL);
5845                 }
5846
5847               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5848               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5849                 {
5850                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5851                     {
5852                       pattern_def_seq = NULL;
5853                       gsi_next (&si);
5854                     }
5855                   continue;
5856                 }
5857             }
5858
5859           /* -------- vectorize statement ------------ */
5860           if (dump_enabled_p ())
5861             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
5862
5863           grouped_store = false;
5864           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5865           if (is_store)
5866             {
5867               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5868                 {
5869                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5870                      interleaving chain was completed - free all the stores in
5871                      the chain.  */
5872                   gsi_next (&si);
5873                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5874                   continue;
5875                 }
5876               else
5877                 {
5878                   /* Free the attached stmt_vec_info and remove the stmt.  */
5879                   gimple store = gsi_stmt (si);
5880                   free_stmt_vec_info (store);
5881                   unlink_stmt_vdef (store);
5882                   gsi_remove (&si, true);
5883                   release_defs (store);
5884                   continue;
5885                 }
5886             }
5887
5888           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5889             {
5890               pattern_def_seq = NULL;
5891               gsi_next (&si);
5892             }
5893         }                       /* stmts in BB */
5894     }                           /* BBs in loop */
5895
5896   slpeel_make_loop_iterate_ntimes (loop, ratio);
5897
5898   /* Reduce loop iterations by the vectorization factor.  */
5899   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
5900                       expected_iterations / vectorization_factor);
5901   loop->nb_iterations_upper_bound
5902     = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
5903                                             FLOOR_DIV_EXPR);
5904   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5905       && loop->nb_iterations_upper_bound != double_int_zero)
5906     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
5907   if (loop->any_estimate)
5908     {
5909       loop->nb_iterations_estimate
5910         = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
5911                                              FLOOR_DIV_EXPR);
5912        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5913            && loop->nb_iterations_estimate != double_int_zero)
5914          loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
5915     }
5916
5917   if (dump_enabled_p ())
5918     {
5919       dump_printf_loc (MSG_NOTE, vect_location,
5920                        "LOOP VECTORIZED\n");
5921       if (loop->inner)
5922         dump_printf_loc (MSG_NOTE, vect_location,
5923                          "OUTER LOOP VECTORIZED\n");
5924       dump_printf (MSG_NOTE, "\n");
5925     }
5926 }