gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "cfghooks.h"
  31 #include "tree-pass.h"
  32 #include "ssa.h"
  33 #include "optabs-tree.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "cfganal.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "cfgloop.h"
  45 #include "params.h"
  46 #include "tree-scalar-evolution.h"
  47 #include "tree-vectorizer.h"
  48 #include "gimple-fold.h"
  49 #include "cgraph.h"
  50
  51 /* Loop Vectorization Pass.
  52
  53    This pass tries to vectorize loops.
  54
  55    For example, the vectorizer transforms the following simple loop:
  56
  57         short a[N]; short b[N]; short c[N]; int i;
  58
  59         for (i=0; i<N; i++){
  60           a[i] = b[i] + c[i];
  61         }
  62
  63    as if it was manually vectorized by rewriting the source code into:
  64
  65         typedef int __attribute__((mode(V8HI))) v8hi;
  66         short a[N];  short b[N]; short c[N];   int i;
  67         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  68         v8hi va, vb, vc;
  69
  70         for (i=0; i<N/8; i++){
  71           vb = pb[i];
  72           vc = pc[i];
  73           va = vb + vc;
  74           pa[i] = va;
  75         }
  76
  77         The main entry to this pass is vectorize_loops(), in which
  78    the vectorizer applies a set of analyses on a given set of loops,
  79    followed by the actual vectorization transformation for the loops that
  80    had successfully passed the analysis phase.
  81         Throughout this pass we make a distinction between two types of
  82    data: scalars (which are represented by SSA_NAMES), and memory references
  83    ("data-refs").  These two types of data require different handling both
  84    during analysis and transformation. The types of data-refs that the
  85    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  86    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  87    accesses are required to have a simple (consecutive) access pattern.
  88
  89    Analysis phase:
  90    ===============
  91         The driver for the analysis phase is vect_analyze_loop().
  92    It applies a set of analyses, some of which rely on the scalar evolution
  93    analyzer (scev) developed by Sebastian Pop.
  94
  95         During the analysis phase the vectorizer records some information
  96    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  97    loop, as well as general information about the loop as a whole, which is
  98    recorded in a "loop_vec_info" struct attached to each loop.
  99
 100    Transformation phase:
 101    =====================
 102         The loop transformation phase scans all the stmts in the loop, and
 103    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 104    the loop that needs to be vectorized.  It inserts the vector code sequence
 105    just before the scalar stmt S, and records a pointer to the vector code
 106    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 107    attached to S).  This pointer will be used for the vectorization of following
 108    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 109    otherwise, we rely on dead code elimination for removing it.
 110
 111         For example, say stmt S1 was vectorized into stmt VS1:
 112
 113    VS1: vb = px[i];
 114    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 115    S2:  a = b;
 116
 117    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 118    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 119    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 120    resulting sequence would be:
 121
 122    VS1: vb = px[i];
 123    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 124    VS2: va = vb;
 125    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 126
 127         Operands that are not SSA_NAMEs, are data-refs that appear in
 128    load/store operations (like 'x[i]' in S1), and are handled differently.
 129
 130    Target modeling:
 131    =================
 132         Currently the only target specific information that is used is the
 133    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 134    Targets that can support different sizes of vectors, for now will need
 135    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 136    flexibility will be added in the future.
 137
 138         Since we only vectorize operations which vector form can be
 139    expressed using existing tree codes, to verify that an operation is
 140    supported, the vectorizer checks the relevant optab at the relevant
 141    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 142    the value found is CODE_FOR_nothing, then there's no target support, and
 143    we can't vectorize the stmt.
 144
 145    For additional information on this project see:
 146    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 147 */
 148
 149 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 150
 151 /* Function vect_determine_vectorization_factor
 152
 153    Determine the vectorization factor (VF).  VF is the number of data elements
 154    that are operated upon in parallel in a single iteration of the vectorized
 155    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 156    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 157    elements can fit in a single vector register.
 158
 159    We currently support vectorization of loops in which all types operated upon
 160    are of the same size.  Therefore this function currently sets VF according to
 161    the size of the types operated upon, and fails if there are multiple sizes
 162    in the loop.
 163
 164    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 165    original loop:
 166         for (i=0; i<N; i++){
 167           a[i] = b[i] + c[i];
 168         }
 169
 170    vectorized loop:
 171         for (i=0; i<N; i+=VF){
 172           a[i:VF] = b[i:VF] + c[i:VF];
 173         }
 174 */
 175
 176 static bool
 177 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 178 {
 179   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 180   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 181   unsigned nbbs = loop->num_nodes;
 182   unsigned int vectorization_factor = 0;
 183   tree scalar_type;
 184   gphi *phi;
 185   tree vectype;
 186   unsigned int nunits;
 187   stmt_vec_info stmt_info;
 188   unsigned i;
 189   HOST_WIDE_INT dummy;
 190   gimple *stmt, *pattern_stmt = NULL;
 191   gimple_seq pattern_def_seq = NULL;
 192   gimple_stmt_iterator pattern_def_si = gsi_none ();
 193   bool analyze_pattern_stmt = false;
 194   bool bool_result;
 195   auto_vec<stmt_vec_info> mask_producers;
 196
 197   if (dump_enabled_p ())
 198     dump_printf_loc (MSG_NOTE, vect_location,
 199                      "=== vect_determine_vectorization_factor ===\n");
 200
 201   for (i = 0; i < nbbs; i++)
 202     {
 203       basic_block bb = bbs[i];
 204
 205       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 206            gsi_next (&si))
 207         {
 208           phi = si.phi ();
 209           stmt_info = vinfo_for_stmt (phi);
 210           if (dump_enabled_p ())
 211             {
 212               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 213               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 214               dump_printf (MSG_NOTE, "\n");
 215             }
 216
 217           gcc_assert (stmt_info);
 218
 219           if (STMT_VINFO_RELEVANT_P (stmt_info))
 220             {
 221               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 222               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 223
 224               if (dump_enabled_p ())
 225                 {
 226                   dump_printf_loc (MSG_NOTE, vect_location,
 227                                    "get vectype for scalar type:  ");
 228                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 229                   dump_printf (MSG_NOTE, "\n");
 230                 }
 231
 232               vectype = get_vectype_for_scalar_type (scalar_type);
 233               if (!vectype)
 234                 {
 235                   if (dump_enabled_p ())
 236                     {
 237                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 238                                        "not vectorized: unsupported "
 239                                        "data-type ");
 240                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 241                                          scalar_type);
 242                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 243                     }
 244                   return false;
 245                 }
 246               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 247
 248               if (dump_enabled_p ())
 249                 {
 250                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 251                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 252                   dump_printf (MSG_NOTE, "\n");
 253                 }
 254
 255               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 256               if (dump_enabled_p ())
 257                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 258                                  nunits);
 259
 260               if (!vectorization_factor
 261                   || (nunits > vectorization_factor))
 262                 vectorization_factor = nunits;
 263             }
 264         }
 265
 266       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 267            !gsi_end_p (si) || analyze_pattern_stmt;)
 268         {
 269           tree vf_vectype;
 270
 271           if (analyze_pattern_stmt)
 272             stmt = pattern_stmt;
 273           else
 274             stmt = gsi_stmt (si);
 275
 276           stmt_info = vinfo_for_stmt (stmt);
 277
 278           if (dump_enabled_p ())
 279             {
 280               dump_printf_loc (MSG_NOTE, vect_location,
 281                                "==> examining statement: ");
 282               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 283               dump_printf (MSG_NOTE, "\n");
 284             }
 285
 286           gcc_assert (stmt_info);
 287
 288           /* Skip stmts which do not need to be vectorized.  */
 289           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 290                && !STMT_VINFO_LIVE_P (stmt_info))
 291               || gimple_clobber_p (stmt))
 292             {
 293               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 294                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 295                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 296                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 297                 {
 298                   stmt = pattern_stmt;
 299                   stmt_info = vinfo_for_stmt (pattern_stmt);
 300                   if (dump_enabled_p ())
 301                     {
 302                       dump_printf_loc (MSG_NOTE, vect_location,
 303                                        "==> examining pattern statement: ");
 304                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 305                       dump_printf (MSG_NOTE, "\n");
 306                     }
 307                 }
 308               else
 309                 {
 310                   if (dump_enabled_p ())
 311                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 312                   gsi_next (&si);
 313                   continue;
 314                 }
 315             }
 316           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 317                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 318                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 319                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 320             analyze_pattern_stmt = true;
 321
 322           /* If a pattern statement has def stmts, analyze them too.  */
 323           if (is_pattern_stmt_p (stmt_info))
 324             {
 325               if (pattern_def_seq == NULL)
 326                 {
 327                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 328                   pattern_def_si = gsi_start (pattern_def_seq);
 329                 }
 330               else if (!gsi_end_p (pattern_def_si))
 331                 gsi_next (&pattern_def_si);
 332               if (pattern_def_seq != NULL)
 333                 {
 334                   gimple *pattern_def_stmt = NULL;
 335                   stmt_vec_info pattern_def_stmt_info = NULL;
 336
 337                   while (!gsi_end_p (pattern_def_si))
 338                     {
 339                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 340                       pattern_def_stmt_info
 341                         = vinfo_for_stmt (pattern_def_stmt);
 342                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 343                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 344                         break;
 345                       gsi_next (&pattern_def_si);
 346                     }
 347
 348                   if (!gsi_end_p (pattern_def_si))
 349                     {
 350                       if (dump_enabled_p ())
 351                         {
 352                           dump_printf_loc (MSG_NOTE, vect_location,
 353                                            "==> examining pattern def stmt: ");
 354                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 355                                             pattern_def_stmt, 0);
 356                           dump_printf (MSG_NOTE, "\n");
 357                         }
 358
 359                       stmt = pattern_def_stmt;
 360                       stmt_info = pattern_def_stmt_info;
 361                     }
 362                   else
 363                     {
 364                       pattern_def_si = gsi_none ();
 365                       analyze_pattern_stmt = false;
 366                     }
 367                 }
 368               else
 369                 analyze_pattern_stmt = false;
 370             }
 371
 372           if (gimple_get_lhs (stmt) == NULL_TREE
 373               /* MASK_STORE has no lhs, but is ok.  */
 374               && (!is_gimple_call (stmt)
 375                   || !gimple_call_internal_p (stmt)
 376                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 377             {
 378               if (is_gimple_call (stmt))
 379                 {
 380                   /* Ignore calls with no lhs.  These must be calls to
 381                      #pragma omp simd functions, and what vectorization factor
 382                      it really needs can't be determined until
 383                      vectorizable_simd_clone_call.  */
 384                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 385                     {
 386                       pattern_def_seq = NULL;
 387                       gsi_next (&si);
 388                     }
 389                   continue;
 390                 }
 391               if (dump_enabled_p ())
 392                 {
 393                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 394                                    "not vectorized: irregular stmt.");
 395                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 396                                     0);
 397                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 398                 }
 399               return false;
 400             }
 401
 402           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 403             {
 404               if (dump_enabled_p ())
 405                 {
 406                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 407                                    "not vectorized: vector stmt in loop:");
 408                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 409                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 410                 }
 411               return false;
 412             }
 413
 414           bool_result = false;
 415
 416           if (STMT_VINFO_VECTYPE (stmt_info))
 417             {
 418               /* The only case when a vectype had been already set is for stmts
 419                  that contain a dataref, or for "pattern-stmts" (stmts
 420                  generated by the vectorizer to represent/replace a certain
 421                  idiom).  */
 422               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 423                           || is_pattern_stmt_p (stmt_info)
 424                           || !gsi_end_p (pattern_def_si));
 425               vectype = STMT_VINFO_VECTYPE (stmt_info);
 426             }
 427           else
 428             {
 429               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 430               if (is_gimple_call (stmt)
 431                   && gimple_call_internal_p (stmt)
 432                   && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
 433                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 434               else
 435                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 436
 437               /* Bool ops don't participate in vectorization factor
 438                  computation.  For comparison use compared types to
 439                  compute a factor.  */
 440               if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
 441                 {
 442                   mask_producers.safe_push (stmt_info);
 443                   bool_result = true;
 444
 445                   if (gimple_code (stmt) == GIMPLE_ASSIGN
 446                       && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
 447                          == tcc_comparison
 448                       && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
 449                          != BOOLEAN_TYPE)
 450                     scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 451                   else
 452                     {
 453                       if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 454                         {
 455                           pattern_def_seq = NULL;
 456                           gsi_next (&si);
 457                         }
 458                       continue;
 459                     }
 460                 }
 461
 462               if (dump_enabled_p ())
 463                 {
 464                   dump_printf_loc (MSG_NOTE, vect_location,
 465                                    "get vectype for scalar type:  ");
 466                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 467                   dump_printf (MSG_NOTE, "\n");
 468                 }
 469               vectype = get_vectype_for_scalar_type (scalar_type);
 470               if (!vectype)
 471                 {
 472                   if (dump_enabled_p ())
 473                     {
 474                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 475                                        "not vectorized: unsupported "
 476                                        "data-type ");
 477                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 478                                          scalar_type);
 479                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 480                     }
 481                   return false;
 482                 }
 483
 484               if (!bool_result)
 485                 STMT_VINFO_VECTYPE (stmt_info) = vectype;
 486
 487               if (dump_enabled_p ())
 488                 {
 489                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 490                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 491                   dump_printf (MSG_NOTE, "\n");
 492                 }
 493             }
 494
 495           /* Don't try to compute VF out scalar types if we stmt
 496              produces boolean vector.  Use result vectype instead.  */
 497           if (VECTOR_BOOLEAN_TYPE_P (vectype))
 498             vf_vectype = vectype;
 499           else
 500             {
 501               /* The vectorization factor is according to the smallest
 502                  scalar type (or the largest vector size, but we only
 503                  support one vector size per loop).  */
 504               if (!bool_result)
 505                 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 506                                                              &dummy);
 507               if (dump_enabled_p ())
 508                 {
 509                   dump_printf_loc (MSG_NOTE, vect_location,
 510                                    "get vectype for scalar type:  ");
 511                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 512                   dump_printf (MSG_NOTE, "\n");
 513                 }
 514               vf_vectype = get_vectype_for_scalar_type (scalar_type);
 515             }
 516           if (!vf_vectype)
 517             {
 518               if (dump_enabled_p ())
 519                 {
 520                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 521                                    "not vectorized: unsupported data-type ");
 522                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 523                                      scalar_type);
 524                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 525                 }
 526               return false;
 527             }
 528
 529           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 530                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 531             {
 532               if (dump_enabled_p ())
 533                 {
 534                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 535                                    "not vectorized: different sized vector "
 536                                    "types in statement, ");
 537                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 538                                      vectype);
 539                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 540                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 541                                      vf_vectype);
 542                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 543                 }
 544               return false;
 545             }
 546
 547           if (dump_enabled_p ())
 548             {
 549               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 550               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 551               dump_printf (MSG_NOTE, "\n");
 552             }
 553
 554           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 555           if (dump_enabled_p ())
 556             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 557           if (!vectorization_factor
 558               || (nunits > vectorization_factor))
 559             vectorization_factor = nunits;
 560
 561           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 562             {
 563               pattern_def_seq = NULL;
 564               gsi_next (&si);
 565             }
 566         }
 567     }
 568
 569   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 570   if (dump_enabled_p ())
 571     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 572                      vectorization_factor);
 573   if (vectorization_factor <= 1)
 574     {
 575       if (dump_enabled_p ())
 576         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 577                          "not vectorized: unsupported data-type\n");
 578       return false;
 579     }
 580   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 581
 582   for (i = 0; i < mask_producers.length (); i++)
 583     {
 584       tree mask_type = NULL;
 585
 586       stmt = STMT_VINFO_STMT (mask_producers[i]);
 587
 588       if (gimple_code (stmt) == GIMPLE_ASSIGN
 589           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
 590           && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
 591         {
 592           scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 593           mask_type = get_mask_type_for_scalar_type (scalar_type);
 594
 595           if (!mask_type)
 596             {
 597               if (dump_enabled_p ())
 598                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 599                                  "not vectorized: unsupported mask\n");
 600               return false;
 601             }
 602         }
 603       else
 604         {
 605           tree rhs;
 606           ssa_op_iter iter;
 607           gimple *def_stmt;
 608           enum vect_def_type dt;
 609
 610           FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
 611             {
 612               if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
 613                                        &def_stmt, &dt, &vectype))
 614                 {
 615                   if (dump_enabled_p ())
 616                     {
 617                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 618                                        "not vectorized: can't compute mask type "
 619                                        "for statement, ");
 620                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 621                                         0);
 622                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 623                     }
 624                   return false;
 625                 }
 626
 627               /* No vectype probably means external definition.
 628                  Allow it in case there is another operand which
 629                  allows to determine mask type.  */
 630               if (!vectype)
 631                 continue;
 632
 633               if (!mask_type)
 634                 mask_type = vectype;
 635               else if (TYPE_VECTOR_SUBPARTS (mask_type)
 636                        != TYPE_VECTOR_SUBPARTS (vectype))
 637                 {
 638                   if (dump_enabled_p ())
 639                     {
 640                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 641                                        "not vectorized: different sized masks "
 642                                        "types in statement, ");
 643                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 644                                          mask_type);
 645                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 646                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 647                                          vectype);
 648                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 649                     }
 650                   return false;
 651                 }
 652               else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
 653                        != VECTOR_BOOLEAN_TYPE_P (vectype))
 654                 {
 655                   if (dump_enabled_p ())
 656                     {
 657                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 658                                        "not vectorized: mixed mask and "
 659                                        "nonmask vector types in statement, ");
 660                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 661                                          mask_type);
 662                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 663                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 664                                          vectype);
 665                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 666                     }
 667                   return false;
 668                 }
 669             }
 670
 671           /* We may compare boolean value loaded as vector of integers.
 672              Fix mask_type in such case.  */
 673           if (mask_type
 674               && !VECTOR_BOOLEAN_TYPE_P (mask_type)
 675               && gimple_code (stmt) == GIMPLE_ASSIGN
 676               && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
 677             mask_type = build_same_sized_truth_vector_type (mask_type);
 678         }
 679
 680       /* No mask_type should mean loop invariant predicate.
 681          This is probably a subject for optimization in
 682          if-conversion.  */
 683       if (!mask_type)
 684         {
 685           if (dump_enabled_p ())
 686             {
 687               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 688                                "not vectorized: can't compute mask type "
 689                                "for statement, ");
 690               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 691                                 0);
 692               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 693             }
 694           return false;
 695         }
 696
 697       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
 698     }
 699
 700   return true;
 701 }
 702
 703
 704 /* Function vect_is_simple_iv_evolution.
 705
 706    FORNOW: A simple evolution of an induction variables in the loop is
 707    considered a polynomial evolution.  */
 708
 709 static bool
 710 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 711                              tree * step)
 712 {
 713   tree init_expr;
 714   tree step_expr;
 715   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 716   basic_block bb;
 717
 718   /* When there is no evolution in this loop, the evolution function
 719      is not "simple".  */
 720   if (evolution_part == NULL_TREE)
 721     return false;
 722
 723   /* When the evolution is a polynomial of degree >= 2
 724      the evolution function is not "simple".  */
 725   if (tree_is_chrec (evolution_part))
 726     return false;
 727
 728   step_expr = evolution_part;
 729   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 730
 731   if (dump_enabled_p ())
 732     {
 733       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 734       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 735       dump_printf (MSG_NOTE, ",  init: ");
 736       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 737       dump_printf (MSG_NOTE, "\n");
 738     }
 739
 740   *init = init_expr;
 741   *step = step_expr;
 742
 743   if (TREE_CODE (step_expr) != INTEGER_CST
 744       && (TREE_CODE (step_expr) != SSA_NAME
 745           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 746               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 747           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 748               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 749                   || !flag_associative_math)))
 750       && (TREE_CODE (step_expr) != REAL_CST
 751           || !flag_associative_math))
 752     {
 753       if (dump_enabled_p ())
 754         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 755                          "step unknown.\n");
 756       return false;
 757     }
 758
 759   return true;
 760 }
 761
 762 /* Function vect_analyze_scalar_cycles_1.
 763
 764    Examine the cross iteration def-use cycles of scalar variables
 765    in LOOP.  LOOP_VINFO represents the loop that is now being
 766    considered for vectorization (can be LOOP, or an outer-loop
 767    enclosing LOOP).  */
 768
 769 static void
 770 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 771 {
 772   basic_block bb = loop->header;
 773   tree init, step;
 774   auto_vec<gimple *, 64> worklist;
 775   gphi_iterator gsi;
 776   bool double_reduc;
 777
 778   if (dump_enabled_p ())
 779     dump_printf_loc (MSG_NOTE, vect_location,
 780                      "=== vect_analyze_scalar_cycles ===\n");
 781
 782   /* First - identify all inductions.  Reduction detection assumes that all the
 783      inductions have been identified, therefore, this order must not be
 784      changed.  */
 785   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 786     {
 787       gphi *phi = gsi.phi ();
 788       tree access_fn = NULL;
 789       tree def = PHI_RESULT (phi);
 790       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 791
 792       if (dump_enabled_p ())
 793         {
 794           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 795           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 796           dump_printf (MSG_NOTE, "\n");
 797         }
 798
 799       /* Skip virtual phi's.  The data dependences that are associated with
 800          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 801       if (virtual_operand_p (def))
 802         continue;
 803
 804       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 805
 806       /* Analyze the evolution function.  */
 807       access_fn = analyze_scalar_evolution (loop, def);
 808       if (access_fn)
 809         {
 810           STRIP_NOPS (access_fn);
 811           if (dump_enabled_p ())
 812             {
 813               dump_printf_loc (MSG_NOTE, vect_location,
 814                                "Access function of PHI: ");
 815               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 816               dump_printf (MSG_NOTE, "\n");
 817             }
 818           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 819             = evolution_part_in_loop_num (access_fn, loop->num);
 820         }
 821
 822       if (!access_fn
 823           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 824           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 825               && TREE_CODE (step) != INTEGER_CST))
 826         {
 827           worklist.safe_push (phi);
 828           continue;
 829         }
 830
 831       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 832
 833       if (dump_enabled_p ())
 834         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 835       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 836     }
 837
 838
 839   /* Second - identify all reductions and nested cycles.  */
 840   while (worklist.length () > 0)
 841     {
 842       gimple *phi = worklist.pop ();
 843       tree def = PHI_RESULT (phi);
 844       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 845       gimple *reduc_stmt;
 846       bool nested_cycle;
 847
 848       if (dump_enabled_p ())
 849         {
 850           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 851           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 852           dump_printf (MSG_NOTE, "\n");
 853         }
 854
 855       gcc_assert (!virtual_operand_p (def)
 856                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 857
 858       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 859       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 860                                                 &double_reduc, false);
 861       if (reduc_stmt)
 862         {
 863           if (double_reduc)
 864             {
 865               if (dump_enabled_p ())
 866                 dump_printf_loc (MSG_NOTE, vect_location,
 867                                  "Detected double reduction.\n");
 868
 869               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 870               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 871                                                     vect_double_reduction_def;
 872             }
 873           else
 874             {
 875               if (nested_cycle)
 876                 {
 877                   if (dump_enabled_p ())
 878                     dump_printf_loc (MSG_NOTE, vect_location,
 879                                      "Detected vectorizable nested cycle.\n");
 880
 881                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 882                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 883                                                              vect_nested_cycle;
 884                 }
 885               else
 886                 {
 887                   if (dump_enabled_p ())
 888                     dump_printf_loc (MSG_NOTE, vect_location,
 889                                      "Detected reduction.\n");
 890
 891                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 892                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 893                                                            vect_reduction_def;
 894                   /* Store the reduction cycles for possible vectorization in
 895                      loop-aware SLP.  */
 896                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 897                 }
 898             }
 899         }
 900       else
 901         if (dump_enabled_p ())
 902           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 903                            "Unknown def-use cycle pattern.\n");
 904     }
 905 }
 906
 907
 908 /* Function vect_analyze_scalar_cycles.
 909
 910    Examine the cross iteration def-use cycles of scalar variables, by
 911    analyzing the loop-header PHIs of scalar variables.  Classify each
 912    cycle as one of the following: invariant, induction, reduction, unknown.
 913    We do that for the loop represented by LOOP_VINFO, and also to its
 914    inner-loop, if exists.
 915    Examples for scalar cycles:
 916
 917    Example1: reduction:
 918
 919               loop1:
 920               for (i=0; i<N; i++)
 921                  sum += a[i];
 922
 923    Example2: induction:
 924
 925               loop2:
 926               for (i=0; i<N; i++)
 927                  a[i] = i;  */
 928
 929 static void
 930 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 931 {
 932   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 933
 934   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 935
 936   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 937      Reductions in such inner-loop therefore have different properties than
 938      the reductions in the nest that gets vectorized:
 939      1. When vectorized, they are executed in the same order as in the original
 940         scalar loop, so we can't change the order of computation when
 941         vectorizing them.
 942      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 943         current checks are too strict.  */
 944
 945   if (loop->inner)
 946     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 947 }
 948
 949 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 950
 951 static void
 952 vect_fixup_reduc_chain (gimple *stmt)
 953 {
 954   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 955   gimple *stmtp;
 956   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 957               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 958   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 959   do
 960     {
 961       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 962       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 963       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 964       if (stmt)
 965         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 966           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 967     }
 968   while (stmt);
 969   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 970 }
 971
 972 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 973
 974 static void
 975 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 976 {
 977   gimple *first;
 978   unsigned i;
 979
 980   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 981     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 982       {
 983         vect_fixup_reduc_chain (first);
 984         LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 985           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 986       }
 987 }
 988
 989 /* Function vect_get_loop_niters.
 990
 991    Determine how many iterations the loop is executed and place it
 992    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
 993    in NUMBER_OF_ITERATIONSM1.
 994
 995    Return the loop exit condition.  */
 996
 997
 998 static gcond *
 999 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
1000                       tree *number_of_iterationsm1)
1001 {
1002   tree niters;
1003
1004   if (dump_enabled_p ())
1005     dump_printf_loc (MSG_NOTE, vect_location,
1006                      "=== get_loop_niters ===\n");
1007
1008   niters = number_of_latch_executions (loop);
1009   *number_of_iterationsm1 = niters;
1010
1011   /* We want the number of loop header executions which is the number
1012      of latch executions plus one.
1013      ???  For UINT_MAX latch executions this number overflows to zero
1014      for loops like do { n++; } while (n != 0);  */
1015   if (niters && !chrec_contains_undetermined (niters))
1016     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
1017                           build_int_cst (TREE_TYPE (niters), 1));
1018   *number_of_iterations = niters;
1019
1020   return get_loop_exit_condition (loop);
1021 }
1022
1023
1024 /* Function bb_in_loop_p
1025
1026    Used as predicate for dfs order traversal of the loop bbs.  */
1027
1028 static bool
1029 bb_in_loop_p (const_basic_block bb, const void *data)
1030 {
1031   const struct loop *const loop = (const struct loop *)data;
1032   if (flow_bb_inside_loop_p (loop, bb))
1033     return true;
1034   return false;
1035 }
1036
1037
1038 /* Function new_loop_vec_info.
1039
1040    Create and initialize a new loop_vec_info struct for LOOP, as well as
1041    stmt_vec_info structs for all the stmts in LOOP.  */
1042
1043 static loop_vec_info
1044 new_loop_vec_info (struct loop *loop)
1045 {
1046   loop_vec_info res;
1047   basic_block *bbs;
1048   gimple_stmt_iterator si;
1049   unsigned int i, nbbs;
1050
1051   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1052   res->kind = vec_info::loop;
1053   LOOP_VINFO_LOOP (res) = loop;
1054
1055   bbs = get_loop_body (loop);
1056
1057   /* Create/Update stmt_info for all stmts in the loop.  */
1058   for (i = 0; i < loop->num_nodes; i++)
1059     {
1060       basic_block bb = bbs[i];
1061
1062       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1063         {
1064           gimple *phi = gsi_stmt (si);
1065           gimple_set_uid (phi, 0);
1066           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1067         }
1068
1069       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1070         {
1071           gimple *stmt = gsi_stmt (si);
1072           gimple_set_uid (stmt, 0);
1073           set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1074         }
1075     }
1076
1077   /* CHECKME: We want to visit all BBs before their successors (except for
1078      latch blocks, for which this assertion wouldn't hold).  In the simple
1079      case of the loop forms we allow, a dfs order of the BBs would the same
1080      as reversed postorder traversal, so we are safe.  */
1081
1082    free (bbs);
1083    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1084    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1085                               bbs, loop->num_nodes, loop);
1086    gcc_assert (nbbs == loop->num_nodes);
1087
1088   LOOP_VINFO_BBS (res) = bbs;
1089   LOOP_VINFO_NITERSM1 (res) = NULL;
1090   LOOP_VINFO_NITERS (res) = NULL;
1091   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1092   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1093   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1094   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1095   LOOP_VINFO_VECT_FACTOR (res) = 0;
1096   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1097   LOOP_VINFO_DATAREFS (res) = vNULL;
1098   LOOP_VINFO_DDRS (res) = vNULL;
1099   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1100   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1101   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1102   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1103   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1104   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1105   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1106   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1107   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1108   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1109   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1110   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1111
1112   return res;
1113 }
1114
1115
1116 /* Function destroy_loop_vec_info.
1117
1118    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1119    stmts in the loop.  */
1120
1121 void
1122 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1123 {
1124   struct loop *loop;
1125   basic_block *bbs;
1126   int nbbs;
1127   gimple_stmt_iterator si;
1128   int j;
1129   vec<slp_instance> slp_instances;
1130   slp_instance instance;
1131   bool swapped;
1132
1133   if (!loop_vinfo)
1134     return;
1135
1136   loop = LOOP_VINFO_LOOP (loop_vinfo);
1137
1138   bbs = LOOP_VINFO_BBS (loop_vinfo);
1139   nbbs = clean_stmts ? loop->num_nodes : 0;
1140   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1141
1142   for (j = 0; j < nbbs; j++)
1143     {
1144       basic_block bb = bbs[j];
1145       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1146         free_stmt_vec_info (gsi_stmt (si));
1147
1148       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1149         {
1150           gimple *stmt = gsi_stmt (si);
1151
1152           /* We may have broken canonical form by moving a constant
1153              into RHS1 of a commutative op.  Fix such occurrences.  */
1154           if (swapped && is_gimple_assign (stmt))
1155             {
1156               enum tree_code code = gimple_assign_rhs_code (stmt);
1157
1158               if ((code == PLUS_EXPR
1159                    || code == POINTER_PLUS_EXPR
1160                    || code == MULT_EXPR)
1161                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1162                 swap_ssa_operands (stmt,
1163                                    gimple_assign_rhs1_ptr (stmt),
1164                                    gimple_assign_rhs2_ptr (stmt));
1165             }
1166
1167           /* Free stmt_vec_info.  */
1168           free_stmt_vec_info (stmt);
1169           gsi_next (&si);
1170         }
1171     }
1172
1173   free (LOOP_VINFO_BBS (loop_vinfo));
1174   vect_destroy_datarefs (loop_vinfo);
1175   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1176   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1177   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1178   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1179   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1180   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1181     vect_free_slp_instance (instance);
1182
1183   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1184   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1185   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1186   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1187
1188   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1189   loop_vinfo->scalar_cost_vec.release ();
1190
1191   free (loop_vinfo);
1192   loop->aux = NULL;
1193 }
1194
1195
1196 /* Calculate the cost of one scalar iteration of the loop.  */
1197 static void
1198 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1199 {
1200   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1201   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1202   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1203   int innerloop_iters, i;
1204
1205   /* Count statements in scalar loop.  Using this as scalar cost for a single
1206      iteration for now.
1207
1208      TODO: Add outer loop support.
1209
1210      TODO: Consider assigning different costs to different scalar
1211      statements.  */
1212
1213   /* FORNOW.  */
1214   innerloop_iters = 1;
1215   if (loop->inner)
1216     innerloop_iters = 50; /* FIXME */
1217
1218   for (i = 0; i < nbbs; i++)
1219     {
1220       gimple_stmt_iterator si;
1221       basic_block bb = bbs[i];
1222
1223       if (bb->loop_father == loop->inner)
1224         factor = innerloop_iters;
1225       else
1226         factor = 1;
1227
1228       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1229         {
1230           gimple *stmt = gsi_stmt (si);
1231           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1232
1233           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1234             continue;
1235
1236           /* Skip stmts that are not vectorized inside the loop.  */
1237           if (stmt_info
1238               && !STMT_VINFO_RELEVANT_P (stmt_info)
1239               && (!STMT_VINFO_LIVE_P (stmt_info)
1240                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1241               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1242             continue;
1243
1244           vect_cost_for_stmt kind;
1245           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1246             {
1247               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1248                kind = scalar_load;
1249              else
1250                kind = scalar_store;
1251             }
1252           else
1253             kind = scalar_stmt;
1254
1255           scalar_single_iter_cost
1256             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1257                                  factor, kind, NULL, 0, vect_prologue);
1258         }
1259     }
1260   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1261     = scalar_single_iter_cost;
1262 }
1263
1264
1265 /* Function vect_analyze_loop_form_1.
1266
1267    Verify that certain CFG restrictions hold, including:
1268    - the loop has a pre-header
1269    - the loop has a single entry and exit
1270    - the loop exit condition is simple enough, and the number of iterations
1271      can be analyzed (a countable loop).  */
1272
1273 bool
1274 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1275                           tree *number_of_iterationsm1,
1276                           tree *number_of_iterations, gcond **inner_loop_cond)
1277 {
1278   if (dump_enabled_p ())
1279     dump_printf_loc (MSG_NOTE, vect_location,
1280                      "=== vect_analyze_loop_form ===\n");
1281
1282   /* Different restrictions apply when we are considering an inner-most loop,
1283      vs. an outer (nested) loop.
1284      (FORNOW. May want to relax some of these restrictions in the future).  */
1285
1286   if (!loop->inner)
1287     {
1288       /* Inner-most loop.  We currently require that the number of BBs is
1289          exactly 2 (the header and latch).  Vectorizable inner-most loops
1290          look like this:
1291
1292                         (pre-header)
1293                            |
1294                           header <--------+
1295                            | |            |
1296                            | +--> latch --+
1297                            |
1298                         (exit-bb)  */
1299
1300       if (loop->num_nodes != 2)
1301         {
1302           if (dump_enabled_p ())
1303             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1304                              "not vectorized: control flow in loop.\n");
1305           return false;
1306         }
1307
1308       if (empty_block_p (loop->header))
1309         {
1310           if (dump_enabled_p ())
1311             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1312                              "not vectorized: empty loop.\n");
1313           return false;
1314         }
1315     }
1316   else
1317     {
1318       struct loop *innerloop = loop->inner;
1319       edge entryedge;
1320
1321       /* Nested loop. We currently require that the loop is doubly-nested,
1322          contains a single inner loop, and the number of BBs is exactly 5.
1323          Vectorizable outer-loops look like this:
1324
1325                         (pre-header)
1326                            |
1327                           header <---+
1328                            |         |
1329                           inner-loop |
1330                            |         |
1331                           tail ------+
1332                            |
1333                         (exit-bb)
1334
1335          The inner-loop has the properties expected of inner-most loops
1336          as described above.  */
1337
1338       if ((loop->inner)->inner || (loop->inner)->next)
1339         {
1340           if (dump_enabled_p ())
1341             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1342                              "not vectorized: multiple nested loops.\n");
1343           return false;
1344         }
1345
1346       if (loop->num_nodes != 5)
1347         {
1348           if (dump_enabled_p ())
1349             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1350                              "not vectorized: control flow in loop.\n");
1351           return false;
1352         }
1353
1354       entryedge = loop_preheader_edge (innerloop);
1355       if (entryedge->src != loop->header
1356           || !single_exit (innerloop)
1357           || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1358         {
1359           if (dump_enabled_p ())
1360             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1361                              "not vectorized: unsupported outerloop form.\n");
1362           return false;
1363         }
1364
1365       /* Analyze the inner-loop.  */
1366       tree inner_niterm1, inner_niter;
1367       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1368                                       &inner_niterm1, &inner_niter, NULL))
1369         {
1370           if (dump_enabled_p ())
1371             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1372                              "not vectorized: Bad inner loop.\n");
1373           return false;
1374         }
1375
1376       if (!expr_invariant_in_loop_p (loop, inner_niter))
1377         {
1378           if (dump_enabled_p ())
1379             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1380                              "not vectorized: inner-loop count not"
1381                              " invariant.\n");
1382           return false;
1383         }
1384
1385       if (dump_enabled_p ())
1386         dump_printf_loc (MSG_NOTE, vect_location,
1387                          "Considering outer-loop vectorization.\n");
1388     }
1389
1390   if (!single_exit (loop)
1391       || EDGE_COUNT (loop->header->preds) != 2)
1392     {
1393       if (dump_enabled_p ())
1394         {
1395           if (!single_exit (loop))
1396             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1397                              "not vectorized: multiple exits.\n");
1398           else if (EDGE_COUNT (loop->header->preds) != 2)
1399             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1400                              "not vectorized: too many incoming edges.\n");
1401         }
1402       return false;
1403     }
1404
1405   /* We assume that the loop exit condition is at the end of the loop. i.e,
1406      that the loop is represented as a do-while (with a proper if-guard
1407      before the loop if needed), where the loop header contains all the
1408      executable statements, and the latch is empty.  */
1409   if (!empty_block_p (loop->latch)
1410       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1411     {
1412       if (dump_enabled_p ())
1413         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1414                          "not vectorized: latch block not empty.\n");
1415       return false;
1416     }
1417
1418   /* Make sure there exists a single-predecessor exit bb:  */
1419   if (!single_pred_p (single_exit (loop)->dest))
1420     {
1421       edge e = single_exit (loop);
1422       if (!(e->flags & EDGE_ABNORMAL))
1423         {
1424           split_loop_exit_edge (e);
1425           if (dump_enabled_p ())
1426             dump_printf (MSG_NOTE, "split exit edge.\n");
1427         }
1428       else
1429         {
1430           if (dump_enabled_p ())
1431             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1432                              "not vectorized: abnormal loop exit edge.\n");
1433           return false;
1434         }
1435     }
1436
1437   *loop_cond = vect_get_loop_niters (loop, number_of_iterations,
1438                                      number_of_iterationsm1);
1439   if (!*loop_cond)
1440     {
1441       if (dump_enabled_p ())
1442         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1443                          "not vectorized: complicated exit condition.\n");
1444       return false;
1445     }
1446
1447   if (!*number_of_iterations
1448       || chrec_contains_undetermined (*number_of_iterations))
1449     {
1450       if (dump_enabled_p ())
1451         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1452                          "not vectorized: number of iterations cannot be "
1453                          "computed.\n");
1454       return false;
1455     }
1456
1457   if (integer_zerop (*number_of_iterations))
1458     {
1459       if (dump_enabled_p ())
1460         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1461                          "not vectorized: number of iterations = 0.\n");
1462       return false;
1463     }
1464
1465   return true;
1466 }
1467
1468 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1469
1470 loop_vec_info
1471 vect_analyze_loop_form (struct loop *loop)
1472 {
1473   tree number_of_iterations, number_of_iterationsm1;
1474   gcond *loop_cond, *inner_loop_cond = NULL;
1475
1476   if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
1477                                   &number_of_iterations, &inner_loop_cond))
1478     return NULL;
1479
1480   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1481   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1482   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1483   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1484
1485   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1486     {
1487       if (dump_enabled_p ())
1488         {
1489           dump_printf_loc (MSG_NOTE, vect_location,
1490                            "Symbolic number of iterations is ");
1491           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1492           dump_printf (MSG_NOTE, "\n");
1493         }
1494     }
1495
1496   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1497   if (inner_loop_cond)
1498     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1499       = loop_exit_ctrl_vec_info_type;
1500
1501   gcc_assert (!loop->aux);
1502   loop->aux = loop_vinfo;
1503   return loop_vinfo;
1504 }
1505
1506
1507
1508 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1509    statements update the vectorization factor.  */
1510
1511 static void
1512 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1513 {
1514   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1515   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1516   int nbbs = loop->num_nodes;
1517   unsigned int vectorization_factor;
1518   int i;
1519
1520   if (dump_enabled_p ())
1521     dump_printf_loc (MSG_NOTE, vect_location,
1522                      "=== vect_update_vf_for_slp ===\n");
1523
1524   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1525   gcc_assert (vectorization_factor != 0);
1526
1527   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1528      vectorization factor of the loop is the unrolling factor required by
1529      the SLP instances.  If that unrolling factor is 1, we say, that we
1530      perform pure SLP on loop - cross iteration parallelism is not
1531      exploited.  */
1532   bool only_slp_in_loop = true;
1533   for (i = 0; i < nbbs; i++)
1534     {
1535       basic_block bb = bbs[i];
1536       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1537            gsi_next (&si))
1538         {
1539           gimple *stmt = gsi_stmt (si);
1540           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1541           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1542               && STMT_VINFO_RELATED_STMT (stmt_info))
1543             {
1544               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1545               stmt_info = vinfo_for_stmt (stmt);
1546             }
1547           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1548                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1549               && !PURE_SLP_STMT (stmt_info))
1550             /* STMT needs both SLP and loop-based vectorization.  */
1551             only_slp_in_loop = false;
1552         }
1553     }
1554
1555   if (only_slp_in_loop)
1556     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1557   else
1558     vectorization_factor
1559       = least_common_multiple (vectorization_factor,
1560                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1561
1562   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1563   if (dump_enabled_p ())
1564     dump_printf_loc (MSG_NOTE, vect_location,
1565                      "Updating vectorization factor to %d\n",
1566                      vectorization_factor);
1567 }
1568
1569 /* Function vect_analyze_loop_operations.
1570
1571    Scan the loop stmts and make sure they are all vectorizable.  */
1572
1573 static bool
1574 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1575 {
1576   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1577   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1578   int nbbs = loop->num_nodes;
1579   int i;
1580   stmt_vec_info stmt_info;
1581   bool need_to_vectorize = false;
1582   bool ok;
1583
1584   if (dump_enabled_p ())
1585     dump_printf_loc (MSG_NOTE, vect_location,
1586                      "=== vect_analyze_loop_operations ===\n");
1587
1588   for (i = 0; i < nbbs; i++)
1589     {
1590       basic_block bb = bbs[i];
1591
1592       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1593            gsi_next (&si))
1594         {
1595           gphi *phi = si.phi ();
1596           ok = true;
1597
1598           stmt_info = vinfo_for_stmt (phi);
1599           if (dump_enabled_p ())
1600             {
1601               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1602               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1603               dump_printf (MSG_NOTE, "\n");
1604             }
1605           if (virtual_operand_p (gimple_phi_result (phi)))
1606             continue;
1607
1608           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1609              (i.e., a phi in the tail of the outer-loop).  */
1610           if (! is_loop_header_bb_p (bb))
1611             {
1612               /* FORNOW: we currently don't support the case that these phis
1613                  are not used in the outerloop (unless it is double reduction,
1614                  i.e., this phi is vect_reduction_def), cause this case
1615                  requires to actually do something here.  */
1616               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1617                    || STMT_VINFO_LIVE_P (stmt_info))
1618                   && STMT_VINFO_DEF_TYPE (stmt_info)
1619                      != vect_double_reduction_def)
1620                 {
1621                   if (dump_enabled_p ())
1622                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1623                                      "Unsupported loop-closed phi in "
1624                                      "outer-loop.\n");
1625                   return false;
1626                 }
1627
1628               /* If PHI is used in the outer loop, we check that its operand
1629                  is defined in the inner loop.  */
1630               if (STMT_VINFO_RELEVANT_P (stmt_info))
1631                 {
1632                   tree phi_op;
1633                   gimple *op_def_stmt;
1634
1635                   if (gimple_phi_num_args (phi) != 1)
1636                     return false;
1637
1638                   phi_op = PHI_ARG_DEF (phi, 0);
1639                   if (TREE_CODE (phi_op) != SSA_NAME)
1640                     return false;
1641
1642                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1643                   if (gimple_nop_p (op_def_stmt)
1644                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1645                       || !vinfo_for_stmt (op_def_stmt))
1646                     return false;
1647
1648                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1649                         != vect_used_in_outer
1650                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1651                            != vect_used_in_outer_by_reduction)
1652                     return false;
1653                 }
1654
1655               continue;
1656             }
1657
1658           gcc_assert (stmt_info);
1659
1660           if (STMT_VINFO_LIVE_P (stmt_info))
1661             {
1662               /* FORNOW: not yet supported.  */
1663               if (dump_enabled_p ())
1664                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1665                                  "not vectorized: value used after loop.\n");
1666               return false;
1667             }
1668
1669           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1670               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1671             {
1672               /* A scalar-dependence cycle that we don't support.  */
1673               if (dump_enabled_p ())
1674                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1675                                  "not vectorized: scalar dependence cycle.\n");
1676               return false;
1677             }
1678
1679           if (STMT_VINFO_RELEVANT_P (stmt_info))
1680             {
1681               need_to_vectorize = true;
1682               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1683                 ok = vectorizable_induction (phi, NULL, NULL);
1684             }
1685
1686           if (!ok)
1687             {
1688               if (dump_enabled_p ())
1689                 {
1690                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1691                                    "not vectorized: relevant phi not "
1692                                    "supported: ");
1693                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1694                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1695                 }
1696               return false;
1697             }
1698         }
1699
1700       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1701            gsi_next (&si))
1702         {
1703           gimple *stmt = gsi_stmt (si);
1704           if (!gimple_clobber_p (stmt)
1705               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1706             return false;
1707         }
1708     } /* bbs */
1709
1710   /* All operations in the loop are either irrelevant (deal with loop
1711      control, or dead), or only used outside the loop and can be moved
1712      out of the loop (e.g. invariants, inductions).  The loop can be
1713      optimized away by scalar optimizations.  We're better off not
1714      touching this loop.  */
1715   if (!need_to_vectorize)
1716     {
1717       if (dump_enabled_p ())
1718         dump_printf_loc (MSG_NOTE, vect_location,
1719                          "All the computation can be taken out of the loop.\n");
1720       if (dump_enabled_p ())
1721         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1722                          "not vectorized: redundant loop. no profit to "
1723                          "vectorize.\n");
1724       return false;
1725     }
1726
1727   return true;
1728 }
1729
1730
1731 /* Function vect_analyze_loop_2.
1732
1733    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1734    for it.  The different analyses will record information in the
1735    loop_vec_info struct.  */
1736 static bool
1737 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1738 {
1739   bool ok;
1740   int max_vf = MAX_VECTORIZATION_FACTOR;
1741   int min_vf = 2;
1742   unsigned int n_stmts = 0;
1743
1744   /* The first group of checks is independent of the vector size.  */
1745   fatal = true;
1746
1747   /* Find all data references in the loop (which correspond to vdefs/vuses)
1748      and analyze their evolution in the loop.  */
1749
1750   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1751
1752   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1753   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1754     {
1755       if (dump_enabled_p ())
1756         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1757                          "not vectorized: loop contains function calls"
1758                          " or data references that cannot be analyzed\n");
1759       return false;
1760     }
1761
1762   for (unsigned i = 0; i < loop->num_nodes; i++)
1763     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1764          !gsi_end_p (gsi); gsi_next (&gsi))
1765       {
1766         gimple *stmt = gsi_stmt (gsi);
1767         if (is_gimple_debug (stmt))
1768           continue;
1769         ++n_stmts;
1770         if (!find_data_references_in_stmt (loop, stmt,
1771                                            &LOOP_VINFO_DATAREFS (loop_vinfo)))
1772           {
1773             if (is_gimple_call (stmt) && loop->safelen)
1774               {
1775                 tree fndecl = gimple_call_fndecl (stmt), op;
1776                 if (fndecl != NULL_TREE)
1777                   {
1778                     cgraph_node *node = cgraph_node::get (fndecl);
1779                     if (node != NULL && node->simd_clones != NULL)
1780                       {
1781                         unsigned int j, n = gimple_call_num_args (stmt);
1782                         for (j = 0; j < n; j++)
1783                           {
1784                             op = gimple_call_arg (stmt, j);
1785                             if (DECL_P (op)
1786                                 || (REFERENCE_CLASS_P (op)
1787                                     && get_base_address (op)))
1788                               break;
1789                           }
1790                         op = gimple_call_lhs (stmt);
1791                         /* Ignore #pragma omp declare simd functions
1792                            if they don't have data references in the
1793                            call stmt itself.  */
1794                         if (j == n
1795                             && !(op
1796                                  && (DECL_P (op)
1797                                      || (REFERENCE_CLASS_P (op)
1798                                          && get_base_address (op)))))
1799                           continue;
1800                       }
1801                   }
1802               }
1803             if (dump_enabled_p ())
1804               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1805                                "not vectorized: loop contains function "
1806                                "calls or data references that cannot "
1807                                "be analyzed\n");
1808             return false;
1809           }
1810       }
1811
1812   /* Analyze the data references and also adjust the minimal
1813      vectorization factor according to the loads and stores.  */
1814
1815   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1816   if (!ok)
1817     {
1818       if (dump_enabled_p ())
1819         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1820                          "bad data references.\n");
1821       return false;
1822     }
1823
1824   /* Classify all cross-iteration scalar data-flow cycles.
1825      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1826   vect_analyze_scalar_cycles (loop_vinfo);
1827
1828   vect_pattern_recog (loop_vinfo);
1829
1830   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1831
1832   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1833      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1834
1835   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1836   if (!ok)
1837     {
1838       if (dump_enabled_p ())
1839         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1840                          "bad data access.\n");
1841       return false;
1842     }
1843
1844   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1845
1846   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1847   if (!ok)
1848     {
1849       if (dump_enabled_p ())
1850         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1851                          "unexpected pattern.\n");
1852       return false;
1853     }
1854
1855   /* While the rest of the analysis below depends on it in some way.  */
1856   fatal = false;
1857
1858   /* Analyze data dependences between the data-refs in the loop
1859      and adjust the maximum vectorization factor according to
1860      the dependences.
1861      FORNOW: fail at the first data dependence that we encounter.  */
1862
1863   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1864   if (!ok
1865       || max_vf < min_vf)
1866     {
1867       if (dump_enabled_p ())
1868             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1869                              "bad data dependence.\n");
1870       return false;
1871     }
1872
1873   ok = vect_determine_vectorization_factor (loop_vinfo);
1874   if (!ok)
1875     {
1876       if (dump_enabled_p ())
1877         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878                          "can't determine vectorization factor.\n");
1879       return false;
1880     }
1881   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1882     {
1883       if (dump_enabled_p ())
1884         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1885                          "bad data dependence.\n");
1886       return false;
1887     }
1888
1889   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1890   ok = vect_analyze_slp (loop_vinfo, n_stmts);
1891   if (!ok)
1892     return false;
1893
1894   /* If there are any SLP instances mark them as pure_slp.  */
1895   bool slp = vect_make_slp_decision (loop_vinfo);
1896   if (slp)
1897     {
1898       /* Find stmts that need to be both vectorized and SLPed.  */
1899       vect_detect_hybrid_slp (loop_vinfo);
1900
1901       /* Update the vectorization factor based on the SLP decision.  */
1902       vect_update_vf_for_slp (loop_vinfo);
1903     }
1904
1905   /* Now the vectorization factor is final.  */
1906   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1907   gcc_assert (vectorization_factor != 0);
1908
1909   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1910     dump_printf_loc (MSG_NOTE, vect_location,
1911                      "vectorization_factor = %d, niters = "
1912                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1913                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1914
1915   HOST_WIDE_INT max_niter
1916     = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1917   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1918        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1919       || (max_niter != -1
1920           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1921     {
1922       if (dump_enabled_p ())
1923         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924                          "not vectorized: iteration count too small.\n");
1925       if (dump_enabled_p ())
1926         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1927                          "not vectorized: iteration count smaller than "
1928                          "vectorization factor.\n");
1929       return false;
1930     }
1931
1932   /* Analyze the alignment of the data-refs in the loop.
1933      Fail if a data reference is found that cannot be vectorized.  */
1934
1935   ok = vect_analyze_data_refs_alignment (loop_vinfo);
1936   if (!ok)
1937     {
1938       if (dump_enabled_p ())
1939         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1940                          "bad data alignment.\n");
1941       return false;
1942     }
1943
1944   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1945      It is important to call pruning after vect_analyze_data_ref_accesses,
1946      since we use grouping information gathered by interleaving analysis.  */
1947   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1948   if (!ok)
1949     {
1950       if (dump_enabled_p ())
1951         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952                          "number of versioning for alias "
1953                          "run-time tests exceeds %d "
1954                          "(--param vect-max-version-for-alias-checks)\n",
1955                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1956       return false;
1957     }
1958
1959   /* Compute the scalar iteration cost.  */
1960   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1961
1962   /* This pass will decide on using loop versioning and/or loop peeling in
1963      order to enhance the alignment of data references in the loop.  */
1964
1965   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1966   if (!ok)
1967     {
1968       if (dump_enabled_p ())
1969         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1970                          "bad data alignment.\n");
1971       return false;
1972     }
1973
1974   if (slp)
1975     {
1976       /* Analyze operations in the SLP instances.  Note this may
1977          remove unsupported SLP instances which makes the above
1978          SLP kind detection invalid.  */
1979       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
1980       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1981                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1982       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
1983         return false;
1984     }
1985
1986   /* Scan all the remaining operations in the loop that are not subject
1987      to SLP and make sure they are vectorizable.  */
1988   ok = vect_analyze_loop_operations (loop_vinfo);
1989   if (!ok)
1990     {
1991       if (dump_enabled_p ())
1992         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1993                          "bad operation or unsupported loop bound.\n");
1994       return false;
1995     }
1996
1997   /* Analyze cost.  Decide if worth while to vectorize.  */
1998   int min_profitable_estimate, min_profitable_iters;
1999   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2000                                       &min_profitable_estimate);
2001
2002   if (min_profitable_iters < 0)
2003     {
2004       if (dump_enabled_p ())
2005         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2006                          "not vectorized: vectorization not profitable.\n");
2007       if (dump_enabled_p ())
2008         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2009                          "not vectorized: vector version will never be "
2010                          "profitable.\n");
2011       return false;
2012     }
2013
2014   int min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2015                                 * vectorization_factor) - 1);
2016
2017   /* Use the cost model only if it is more conservative than user specified
2018      threshold.  */
2019   unsigned th = (unsigned) min_scalar_loop_bound;
2020   if (min_profitable_iters
2021       && (!min_scalar_loop_bound
2022           || min_profitable_iters > min_scalar_loop_bound))
2023     th = (unsigned) min_profitable_iters;
2024
2025   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2026
2027   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2028       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2029     {
2030       if (dump_enabled_p ())
2031         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2032                          "not vectorized: vectorization not profitable.\n");
2033       if (dump_enabled_p ())
2034         dump_printf_loc (MSG_NOTE, vect_location,
2035                          "not vectorized: iteration count smaller than user "
2036                          "specified loop bound parameter or minimum profitable "
2037                          "iterations (whichever is more conservative).\n");
2038       return false;
2039     }
2040
2041   HOST_WIDE_INT estimated_niter
2042     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2043   if (estimated_niter != -1
2044       && ((unsigned HOST_WIDE_INT) estimated_niter
2045           <= MAX (th, (unsigned)min_profitable_estimate)))
2046     {
2047       if (dump_enabled_p ())
2048         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049                          "not vectorized: estimated iteration count too "
2050                          "small.\n");
2051       if (dump_enabled_p ())
2052         dump_printf_loc (MSG_NOTE, vect_location,
2053                          "not vectorized: estimated iteration count smaller "
2054                          "than specified loop bound parameter or minimum "
2055                          "profitable iterations (whichever is more "
2056                          "conservative).\n");
2057       return false;
2058     }
2059
2060   /* Decide whether we need to create an epilogue loop to handle
2061      remaining scalar iterations.  */
2062   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2063         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2064        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2065
2066   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2067       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2068     {
2069       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2070                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2071           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2072         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2073     }
2074   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2075            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2076                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2077                /* In case of versioning, check if the maximum number of
2078                   iterations is greater than th.  If they are identical,
2079                   the epilogue is unnecessary.  */
2080                && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
2081                     && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2082                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2083     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2084
2085   /* If an epilogue loop is required make sure we can create one.  */
2086   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2087       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2088     {
2089       if (dump_enabled_p ())
2090         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2091       if (!vect_can_advance_ivs_p (loop_vinfo)
2092           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2093                                            single_exit (LOOP_VINFO_LOOP
2094                                                          (loop_vinfo))))
2095         {
2096           if (dump_enabled_p ())
2097             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2098                              "not vectorized: can't create required "
2099                              "epilog loop\n");
2100           return false;
2101         }
2102     }
2103
2104   gcc_assert (vectorization_factor
2105               == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2106
2107   return true;
2108 }
2109
2110 /* Function vect_analyze_loop.
2111
2112    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2113    for it.  The different analyses will record information in the
2114    loop_vec_info struct.  */
2115 loop_vec_info
2116 vect_analyze_loop (struct loop *loop)
2117 {
2118   loop_vec_info loop_vinfo;
2119   unsigned int vector_sizes;
2120
2121   /* Autodetect first vector size we try.  */
2122   current_vector_size = 0;
2123   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2124
2125   if (dump_enabled_p ())
2126     dump_printf_loc (MSG_NOTE, vect_location,
2127                      "===== analyze_loop_nest =====\n");
2128
2129   if (loop_outer (loop)
2130       && loop_vec_info_for_loop (loop_outer (loop))
2131       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2132     {
2133       if (dump_enabled_p ())
2134         dump_printf_loc (MSG_NOTE, vect_location,
2135                          "outer-loop already vectorized.\n");
2136       return NULL;
2137     }
2138
2139   while (1)
2140     {
2141       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2142       loop_vinfo = vect_analyze_loop_form (loop);
2143       if (!loop_vinfo)
2144         {
2145           if (dump_enabled_p ())
2146             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147                              "bad loop form.\n");
2148           return NULL;
2149         }
2150
2151       bool fatal = false;
2152       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2153         {
2154           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2155
2156           return loop_vinfo;
2157         }
2158
2159       destroy_loop_vec_info (loop_vinfo, true);
2160
2161       vector_sizes &= ~current_vector_size;
2162       if (fatal
2163           || vector_sizes == 0
2164           || current_vector_size == 0)
2165         return NULL;
2166
2167       /* Try the next biggest vector size.  */
2168       current_vector_size = 1 << floor_log2 (vector_sizes);
2169       if (dump_enabled_p ())
2170         dump_printf_loc (MSG_NOTE, vect_location,
2171                          "***** Re-trying analysis with "
2172                          "vector size %d\n", current_vector_size);
2173     }
2174 }
2175
2176
2177 /* Function reduction_code_for_scalar_code
2178
2179    Input:
2180    CODE - tree_code of a reduction operations.
2181
2182    Output:
2183    REDUC_CODE - the corresponding tree-code to be used to reduce the
2184       vector of partial results into a single scalar result, or ERROR_MARK
2185       if the operation is a supported reduction operation, but does not have
2186       such a tree-code.
2187
2188    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2189
2190 static bool
2191 reduction_code_for_scalar_code (enum tree_code code,
2192                                 enum tree_code *reduc_code)
2193 {
2194   switch (code)
2195     {
2196       case MAX_EXPR:
2197         *reduc_code = REDUC_MAX_EXPR;
2198         return true;
2199
2200       case MIN_EXPR:
2201         *reduc_code = REDUC_MIN_EXPR;
2202         return true;
2203
2204       case PLUS_EXPR:
2205         *reduc_code = REDUC_PLUS_EXPR;
2206         return true;
2207
2208       case MULT_EXPR:
2209       case MINUS_EXPR:
2210       case BIT_IOR_EXPR:
2211       case BIT_XOR_EXPR:
2212       case BIT_AND_EXPR:
2213         *reduc_code = ERROR_MARK;
2214         return true;
2215
2216       default:
2217        return false;
2218     }
2219 }
2220
2221
2222 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2223    STMT is printed with a message MSG. */
2224
2225 static void
2226 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2227 {
2228   dump_printf_loc (msg_type, vect_location, "%s", msg);
2229   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2230   dump_printf (msg_type, "\n");
2231 }
2232
2233
2234 /* Detect SLP reduction of the form:
2235
2236    #a1 = phi <a5, a0>
2237    a2 = operation (a1)
2238    a3 = operation (a2)
2239    a4 = operation (a3)
2240    a5 = operation (a4)
2241
2242    #a = phi <a5>
2243
2244    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2245    FIRST_STMT is the first reduction stmt in the chain
2246    (a2 = operation (a1)).
2247
2248    Return TRUE if a reduction chain was detected.  */
2249
2250 static bool
2251 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2252                        gimple *first_stmt)
2253 {
2254   struct loop *loop = (gimple_bb (phi))->loop_father;
2255   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2256   enum tree_code code;
2257   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2258   stmt_vec_info use_stmt_info, current_stmt_info;
2259   tree lhs;
2260   imm_use_iterator imm_iter;
2261   use_operand_p use_p;
2262   int nloop_uses, size = 0, n_out_of_loop_uses;
2263   bool found = false;
2264
2265   if (loop != vect_loop)
2266     return false;
2267
2268   lhs = PHI_RESULT (phi);
2269   code = gimple_assign_rhs_code (first_stmt);
2270   while (1)
2271     {
2272       nloop_uses = 0;
2273       n_out_of_loop_uses = 0;
2274       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2275         {
2276           gimple *use_stmt = USE_STMT (use_p);
2277           if (is_gimple_debug (use_stmt))
2278             continue;
2279
2280           /* Check if we got back to the reduction phi.  */
2281           if (use_stmt == phi)
2282             {
2283               loop_use_stmt = use_stmt;
2284               found = true;
2285               break;
2286             }
2287
2288           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2289             {
2290               loop_use_stmt = use_stmt;
2291               nloop_uses++;
2292             }
2293            else
2294              n_out_of_loop_uses++;
2295
2296            /* There are can be either a single use in the loop or two uses in
2297               phi nodes.  */
2298            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2299              return false;
2300         }
2301
2302       if (found)
2303         break;
2304
2305       /* We reached a statement with no loop uses.  */
2306       if (nloop_uses == 0)
2307         return false;
2308
2309       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2310       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2311         return false;
2312
2313       if (!is_gimple_assign (loop_use_stmt)
2314           || code != gimple_assign_rhs_code (loop_use_stmt)
2315           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2316         return false;
2317
2318       /* Insert USE_STMT into reduction chain.  */
2319       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2320       if (current_stmt)
2321         {
2322           current_stmt_info = vinfo_for_stmt (current_stmt);
2323           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2324           GROUP_FIRST_ELEMENT (use_stmt_info)
2325             = GROUP_FIRST_ELEMENT (current_stmt_info);
2326         }
2327       else
2328         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2329
2330       lhs = gimple_assign_lhs (loop_use_stmt);
2331       current_stmt = loop_use_stmt;
2332       size++;
2333    }
2334
2335   if (!found || loop_use_stmt != phi || size < 2)
2336     return false;
2337
2338   /* Swap the operands, if needed, to make the reduction operand be the second
2339      operand.  */
2340   lhs = PHI_RESULT (phi);
2341   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2342   while (next_stmt)
2343     {
2344       if (gimple_assign_rhs2 (next_stmt) == lhs)
2345         {
2346           tree op = gimple_assign_rhs1 (next_stmt);
2347           gimple *def_stmt = NULL;
2348
2349           if (TREE_CODE (op) == SSA_NAME)
2350             def_stmt = SSA_NAME_DEF_STMT (op);
2351
2352           /* Check that the other def is either defined in the loop
2353              ("vect_internal_def"), or it's an induction (defined by a
2354              loop-header phi-node).  */
2355           if (def_stmt
2356               && gimple_bb (def_stmt)
2357               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2358               && (is_gimple_assign (def_stmt)
2359                   || is_gimple_call (def_stmt)
2360                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2361                            == vect_induction_def
2362                   || (gimple_code (def_stmt) == GIMPLE_PHI
2363                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2364                                   == vect_internal_def
2365                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2366             {
2367               lhs = gimple_assign_lhs (next_stmt);
2368               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2369               continue;
2370             }
2371
2372           return false;
2373         }
2374       else
2375         {
2376           tree op = gimple_assign_rhs2 (next_stmt);
2377           gimple *def_stmt = NULL;
2378
2379           if (TREE_CODE (op) == SSA_NAME)
2380             def_stmt = SSA_NAME_DEF_STMT (op);
2381
2382           /* Check that the other def is either defined in the loop
2383             ("vect_internal_def"), or it's an induction (defined by a
2384             loop-header phi-node).  */
2385           if (def_stmt
2386               && gimple_bb (def_stmt)
2387               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2388               && (is_gimple_assign (def_stmt)
2389                   || is_gimple_call (def_stmt)
2390                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2391                               == vect_induction_def
2392                   || (gimple_code (def_stmt) == GIMPLE_PHI
2393                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2394                                   == vect_internal_def
2395                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2396             {
2397               if (dump_enabled_p ())
2398                 {
2399                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2400                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2401                   dump_printf (MSG_NOTE, "\n");
2402                 }
2403
2404               swap_ssa_operands (next_stmt,
2405                                  gimple_assign_rhs1_ptr (next_stmt),
2406                                  gimple_assign_rhs2_ptr (next_stmt));
2407               update_stmt (next_stmt);
2408
2409               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2410                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2411             }
2412           else
2413             return false;
2414         }
2415
2416       lhs = gimple_assign_lhs (next_stmt);
2417       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2418     }
2419
2420   /* Save the chain for further analysis in SLP detection.  */
2421   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2422   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2423   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2424
2425   return true;
2426 }
2427
2428
2429 /* Function vect_is_simple_reduction_1
2430
2431    (1) Detect a cross-iteration def-use cycle that represents a simple
2432    reduction computation.  We look for the following pattern:
2433
2434    loop_header:
2435      a1 = phi < a0, a2 >
2436      a3 = ...
2437      a2 = operation (a3, a1)
2438
2439    or
2440
2441    a3 = ...
2442    loop_header:
2443      a1 = phi < a0, a2 >
2444      a2 = operation (a3, a1)
2445
2446    such that:
2447    1. operation is commutative and associative and it is safe to
2448       change the order of the computation (if CHECK_REDUCTION is true)
2449    2. no uses for a2 in the loop (a2 is used out of the loop)
2450    3. no uses of a1 in the loop besides the reduction operation
2451    4. no uses of a1 outside the loop.
2452
2453    Conditions 1,4 are tested here.
2454    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2455
2456    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2457    nested cycles, if CHECK_REDUCTION is false.
2458
2459    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2460    reductions:
2461
2462      a1 = phi < a0, a2 >
2463      inner loop (def of a3)
2464      a2 = phi < a3 >
2465
2466    (4) Detect condition expressions, ie:
2467      for (int i = 0; i < N; i++)
2468        if (a[i] < val)
2469         ret_val = a[i];
2470
2471    If MODIFY is true it tries also to rework the code in-place to enable
2472    detection of more reduction patterns.  For the time being we rewrite
2473    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2474 */
2475
2476 static gimple *
2477 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
2478                             bool check_reduction, bool *double_reduc,
2479                             bool modify, bool need_wrapping_integral_overflow,
2480                             enum vect_reduction_type *v_reduc_type)
2481 {
2482   struct loop *loop = (gimple_bb (phi))->loop_father;
2483   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2484   edge latch_e = loop_latch_edge (loop);
2485   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2486   gimple *def_stmt, *def1 = NULL, *def2 = NULL;
2487   enum tree_code orig_code, code;
2488   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2489   tree type;
2490   int nloop_uses;
2491   tree name;
2492   imm_use_iterator imm_iter;
2493   use_operand_p use_p;
2494   bool phi_def;
2495
2496   *double_reduc = false;
2497   *v_reduc_type = TREE_CODE_REDUCTION;
2498
2499   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2500      otherwise, we assume outer loop vectorization.  */
2501   gcc_assert ((check_reduction && loop == vect_loop)
2502               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2503
2504   name = PHI_RESULT (phi);
2505   /* ???  If there are no uses of the PHI result the inner loop reduction
2506      won't be detected as possibly double-reduction by vectorizable_reduction
2507      because that tries to walk the PHI arg from the preheader edge which
2508      can be constant.  See PR60382.  */
2509   if (has_zero_uses (name))
2510     return NULL;
2511   nloop_uses = 0;
2512   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2513     {
2514       gimple *use_stmt = USE_STMT (use_p);
2515       if (is_gimple_debug (use_stmt))
2516         continue;
2517
2518       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2519         {
2520           if (dump_enabled_p ())
2521             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522                              "intermediate value used outside loop.\n");
2523
2524           return NULL;
2525         }
2526
2527       nloop_uses++;
2528       if (nloop_uses > 1)
2529         {
2530           if (dump_enabled_p ())
2531             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532                              "reduction used in loop.\n");
2533           return NULL;
2534         }
2535     }
2536
2537   if (TREE_CODE (loop_arg) != SSA_NAME)
2538     {
2539       if (dump_enabled_p ())
2540         {
2541           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2542                            "reduction: not ssa_name: ");
2543           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2544           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2545         }
2546       return NULL;
2547     }
2548
2549   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2550   if (!def_stmt)
2551     {
2552       if (dump_enabled_p ())
2553         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554                          "reduction: no def_stmt.\n");
2555       return NULL;
2556     }
2557
2558   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2559     {
2560       if (dump_enabled_p ())
2561         {
2562           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2563           dump_printf (MSG_NOTE, "\n");
2564         }
2565       return NULL;
2566     }
2567
2568   if (is_gimple_assign (def_stmt))
2569     {
2570       name = gimple_assign_lhs (def_stmt);
2571       phi_def = false;
2572     }
2573   else
2574     {
2575       name = PHI_RESULT (def_stmt);
2576       phi_def = true;
2577     }
2578
2579   nloop_uses = 0;
2580   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2581     {
2582       gimple *use_stmt = USE_STMT (use_p);
2583       if (is_gimple_debug (use_stmt))
2584         continue;
2585       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2586         nloop_uses++;
2587       if (nloop_uses > 1)
2588         {
2589           if (dump_enabled_p ())
2590             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591                              "reduction used in loop.\n");
2592           return NULL;
2593         }
2594     }
2595
2596   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2597      defined in the inner loop.  */
2598   if (phi_def)
2599     {
2600       op1 = PHI_ARG_DEF (def_stmt, 0);
2601
2602       if (gimple_phi_num_args (def_stmt) != 1
2603           || TREE_CODE (op1) != SSA_NAME)
2604         {
2605           if (dump_enabled_p ())
2606             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2607                              "unsupported phi node definition.\n");
2608
2609           return NULL;
2610         }
2611
2612       def1 = SSA_NAME_DEF_STMT (op1);
2613       if (gimple_bb (def1)
2614           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2615           && loop->inner
2616           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2617           && is_gimple_assign (def1))
2618         {
2619           if (dump_enabled_p ())
2620             report_vect_op (MSG_NOTE, def_stmt,
2621                             "detected double reduction: ");
2622
2623           *double_reduc = true;
2624           return def_stmt;
2625         }
2626
2627       return NULL;
2628     }
2629
2630   code = orig_code = gimple_assign_rhs_code (def_stmt);
2631
2632   /* We can handle "res -= x[i]", which is non-associative by
2633      simply rewriting this into "res += -x[i]".  Avoid changing
2634      gimple instruction for the first simple tests and only do this
2635      if we're allowed to change code at all.  */
2636   if (code == MINUS_EXPR
2637       && modify
2638       && (op1 = gimple_assign_rhs1 (def_stmt))
2639       && TREE_CODE (op1) == SSA_NAME
2640       && SSA_NAME_DEF_STMT (op1) == phi)
2641     code = PLUS_EXPR;
2642
2643   if (check_reduction)
2644     {
2645       if (code == COND_EXPR)
2646         *v_reduc_type = COND_REDUCTION;
2647       else if (!commutative_tree_code (code) || !associative_tree_code (code))
2648         {
2649           if (dump_enabled_p ())
2650             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2651                             "reduction: not commutative/associative: ");
2652           return NULL;
2653         }
2654     }
2655
2656   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2657     {
2658       if (code != COND_EXPR)
2659         {
2660           if (dump_enabled_p ())
2661             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2662                             "reduction: not binary operation: ");
2663
2664           return NULL;
2665         }
2666
2667       op3 = gimple_assign_rhs1 (def_stmt);
2668       if (COMPARISON_CLASS_P (op3))
2669         {
2670           op4 = TREE_OPERAND (op3, 1);
2671           op3 = TREE_OPERAND (op3, 0);
2672         }
2673
2674       op1 = gimple_assign_rhs2 (def_stmt);
2675       op2 = gimple_assign_rhs3 (def_stmt);
2676
2677       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2678         {
2679           if (dump_enabled_p ())
2680             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2681                             "reduction: uses not ssa_names: ");
2682
2683           return NULL;
2684         }
2685     }
2686   else
2687     {
2688       op1 = gimple_assign_rhs1 (def_stmt);
2689       op2 = gimple_assign_rhs2 (def_stmt);
2690
2691       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2692         {
2693           if (dump_enabled_p ())
2694             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2695                             "reduction: uses not ssa_names: ");
2696
2697           return NULL;
2698         }
2699    }
2700
2701   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2702   if ((TREE_CODE (op1) == SSA_NAME
2703        && !types_compatible_p (type,TREE_TYPE (op1)))
2704       || (TREE_CODE (op2) == SSA_NAME
2705           && !types_compatible_p (type, TREE_TYPE (op2)))
2706       || (op3 && TREE_CODE (op3) == SSA_NAME
2707           && !types_compatible_p (type, TREE_TYPE (op3)))
2708       || (op4 && TREE_CODE (op4) == SSA_NAME
2709           && !types_compatible_p (type, TREE_TYPE (op4))))
2710     {
2711       if (dump_enabled_p ())
2712         {
2713           dump_printf_loc (MSG_NOTE, vect_location,
2714                            "reduction: multiple types: operation type: ");
2715           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2716           dump_printf (MSG_NOTE, ", operands types: ");
2717           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2718                              TREE_TYPE (op1));
2719           dump_printf (MSG_NOTE, ",");
2720           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2721                              TREE_TYPE (op2));
2722           if (op3)
2723             {
2724               dump_printf (MSG_NOTE, ",");
2725               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2726                                  TREE_TYPE (op3));
2727             }
2728
2729           if (op4)
2730             {
2731               dump_printf (MSG_NOTE, ",");
2732               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2733                                  TREE_TYPE (op4));
2734             }
2735           dump_printf (MSG_NOTE, "\n");
2736         }
2737
2738       return NULL;
2739     }
2740
2741   /* Check that it's ok to change the order of the computation.
2742      Generally, when vectorizing a reduction we change the order of the
2743      computation.  This may change the behavior of the program in some
2744      cases, so we need to check that this is ok.  One exception is when
2745      vectorizing an outer-loop: the inner-loop is executed sequentially,
2746      and therefore vectorizing reductions in the inner-loop during
2747      outer-loop vectorization is safe.  */
2748
2749   if (*v_reduc_type != COND_REDUCTION)
2750     {
2751       /* CHECKME: check for !flag_finite_math_only too?  */
2752       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2753           && check_reduction)
2754         {
2755           /* Changing the order of operations changes the semantics.  */
2756           if (dump_enabled_p ())
2757             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2758                         "reduction: unsafe fp math optimization: ");
2759           return NULL;
2760         }
2761       else if (INTEGRAL_TYPE_P (type) && check_reduction)
2762         {
2763           if (!operation_no_trapping_overflow (type, code))
2764             {
2765               /* Changing the order of operations changes the semantics.  */
2766               if (dump_enabled_p ())
2767                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2768                                 "reduction: unsafe int math optimization"
2769                                 " (overflow traps): ");
2770               return NULL;
2771             }
2772           if (need_wrapping_integral_overflow
2773               && !TYPE_OVERFLOW_WRAPS (type)
2774               && operation_can_overflow (code))
2775             {
2776               /* Changing the order of operations changes the semantics.  */
2777               if (dump_enabled_p ())
2778                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2779                                 "reduction: unsafe int math optimization"
2780                                 " (overflow doesn't wrap): ");
2781               return NULL;
2782             }
2783         }
2784       else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2785         {
2786           /* Changing the order of operations changes the semantics.  */
2787           if (dump_enabled_p ())
2788           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2789                           "reduction: unsafe fixed-point math optimization: ");
2790           return NULL;
2791         }
2792     }
2793
2794   /* If we detected "res -= x[i]" earlier, rewrite it into
2795      "res += -x[i]" now.  If this turns out to be useless reassoc
2796      will clean it up again.  */
2797   if (orig_code == MINUS_EXPR)
2798     {
2799       tree rhs = gimple_assign_rhs2 (def_stmt);
2800       tree negrhs = make_ssa_name (TREE_TYPE (rhs));
2801       gimple *negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
2802       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2803       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2804                                                           loop_info));
2805       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2806       gimple_assign_set_rhs2 (def_stmt, negrhs);
2807       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2808       update_stmt (def_stmt);
2809     }
2810
2811   /* Reduction is safe. We're dealing with one of the following:
2812      1) integer arithmetic and no trapv
2813      2) floating point arithmetic, and special flags permit this optimization
2814      3) nested cycle (i.e., outer loop vectorization).  */
2815   if (TREE_CODE (op1) == SSA_NAME)
2816     def1 = SSA_NAME_DEF_STMT (op1);
2817
2818   if (TREE_CODE (op2) == SSA_NAME)
2819     def2 = SSA_NAME_DEF_STMT (op2);
2820
2821   if (code != COND_EXPR
2822       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2823     {
2824       if (dump_enabled_p ())
2825         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2826       return NULL;
2827     }
2828
2829   /* Check that one def is the reduction def, defined by PHI,
2830      the other def is either defined in the loop ("vect_internal_def"),
2831      or it's an induction (defined by a loop-header phi-node).  */
2832
2833   if (def2 && def2 == phi
2834       && (code == COND_EXPR
2835           || !def1 || gimple_nop_p (def1)
2836           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2837           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2838               && (is_gimple_assign (def1)
2839                   || is_gimple_call (def1)
2840                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2841                       == vect_induction_def
2842                   || (gimple_code (def1) == GIMPLE_PHI
2843                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2844                           == vect_internal_def
2845                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2846     {
2847       if (dump_enabled_p ())
2848         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2849       return def_stmt;
2850     }
2851
2852   if (def1 && def1 == phi
2853       && (code == COND_EXPR
2854           || !def2 || gimple_nop_p (def2)
2855           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2856           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2857               && (is_gimple_assign (def2)
2858                   || is_gimple_call (def2)
2859                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2860                       == vect_induction_def
2861                   || (gimple_code (def2) == GIMPLE_PHI
2862                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2863                           == vect_internal_def
2864                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2865     {
2866       if (check_reduction)
2867         {
2868           if (code == COND_EXPR)
2869             {
2870               /* No current known use where this case would be useful.  */
2871               if (dump_enabled_p ())
2872                 report_vect_op (MSG_NOTE, def_stmt,
2873                                 "detected reduction: cannot currently swap "
2874                                 "operands for cond_expr");
2875               return NULL;
2876             }
2877
2878           /* Swap operands (just for simplicity - so that the rest of the code
2879              can assume that the reduction variable is always the last (second)
2880              argument).  */
2881           if (dump_enabled_p ())
2882             report_vect_op (MSG_NOTE, def_stmt,
2883                             "detected reduction: need to swap operands: ");
2884
2885           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2886                              gimple_assign_rhs2_ptr (def_stmt));
2887
2888           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2889             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2890         }
2891       else
2892         {
2893           if (dump_enabled_p ())
2894             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2895         }
2896
2897       return def_stmt;
2898     }
2899
2900   /* Try to find SLP reduction chain.  */
2901   if (check_reduction && code != COND_EXPR
2902       && vect_is_slp_reduction (loop_info, phi, def_stmt))
2903     {
2904       if (dump_enabled_p ())
2905         report_vect_op (MSG_NOTE, def_stmt,
2906                         "reduction: detected reduction chain: ");
2907
2908       return def_stmt;
2909     }
2910
2911   if (dump_enabled_p ())
2912     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2913                     "reduction: unknown pattern: ");
2914
2915   return NULL;
2916 }
2917
2918 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2919    in-place.  Arguments as there.  */
2920
2921 static gimple *
2922 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2923                           bool check_reduction, bool *double_reduc,
2924                           bool need_wrapping_integral_overflow,
2925                           enum vect_reduction_type *v_reduc_type)
2926 {
2927   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2928                                      double_reduc, false,
2929                                      need_wrapping_integral_overflow,
2930                                      v_reduc_type);
2931 }
2932
2933 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2934    in-place if it enables detection of more reductions.  Arguments
2935    as there.  */
2936
2937 gimple *
2938 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
2939                              bool check_reduction, bool *double_reduc,
2940                              bool need_wrapping_integral_overflow)
2941 {
2942   enum vect_reduction_type v_reduc_type;
2943   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2944                                      double_reduc, true,
2945                                      need_wrapping_integral_overflow,
2946                                      &v_reduc_type);
2947 }
2948
2949 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2950 int
2951 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2952                              int *peel_iters_epilogue,
2953                              stmt_vector_for_cost *scalar_cost_vec,
2954                              stmt_vector_for_cost *prologue_cost_vec,
2955                              stmt_vector_for_cost *epilogue_cost_vec)
2956 {
2957   int retval = 0;
2958   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2959
2960   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2961     {
2962       *peel_iters_epilogue = vf/2;
2963       if (dump_enabled_p ())
2964         dump_printf_loc (MSG_NOTE, vect_location,
2965                          "cost model: epilogue peel iters set to vf/2 "
2966                          "because loop iterations are unknown .\n");
2967
2968       /* If peeled iterations are known but number of scalar loop
2969          iterations are unknown, count a taken branch per peeled loop.  */
2970       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2971                                  NULL, 0, vect_prologue);
2972       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2973                                  NULL, 0, vect_epilogue);
2974     }
2975   else
2976     {
2977       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2978       peel_iters_prologue = niters < peel_iters_prologue ?
2979                             niters : peel_iters_prologue;
2980       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2981       /* If we need to peel for gaps, but no peeling is required, we have to
2982          peel VF iterations.  */
2983       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2984         *peel_iters_epilogue = vf;
2985     }
2986
2987   stmt_info_for_cost *si;
2988   int j;
2989   if (peel_iters_prologue)
2990     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2991       retval += record_stmt_cost (prologue_cost_vec,
2992                                   si->count * peel_iters_prologue,
2993                                   si->kind, NULL, si->misalign,
2994                                   vect_prologue);
2995   if (*peel_iters_epilogue)
2996     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2997       retval += record_stmt_cost (epilogue_cost_vec,
2998                                   si->count * *peel_iters_epilogue,
2999                                   si->kind, NULL, si->misalign,
3000                                   vect_epilogue);
3001
3002   return retval;
3003 }
3004
3005 /* Function vect_estimate_min_profitable_iters
3006
3007    Return the number of iterations required for the vector version of the
3008    loop to be profitable relative to the cost of the scalar version of the
3009    loop.  */
3010
3011 static void
3012 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3013                                     int *ret_min_profitable_niters,
3014                                     int *ret_min_profitable_estimate)
3015 {
3016   int min_profitable_iters;
3017   int min_profitable_estimate;
3018   int peel_iters_prologue;
3019   int peel_iters_epilogue;
3020   unsigned vec_inside_cost = 0;
3021   int vec_outside_cost = 0;
3022   unsigned vec_prologue_cost = 0;
3023   unsigned vec_epilogue_cost = 0;
3024   int scalar_single_iter_cost = 0;
3025   int scalar_outside_cost = 0;
3026   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3027   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3028   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3029
3030   /* Cost model disabled.  */
3031   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3032     {
3033       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3034       *ret_min_profitable_niters = 0;
3035       *ret_min_profitable_estimate = 0;
3036       return;
3037     }
3038
3039   /* Requires loop versioning tests to handle misalignment.  */
3040   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3041     {
3042       /*  FIXME: Make cost depend on complexity of individual check.  */
3043       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3044       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3045                             vect_prologue);
3046       dump_printf (MSG_NOTE,
3047                    "cost model: Adding cost of checks for loop "
3048                    "versioning to treat misalignment.\n");
3049     }
3050
3051   /* Requires loop versioning with alias checks.  */
3052   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3053     {
3054       /*  FIXME: Make cost depend on complexity of individual check.  */
3055       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3056       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3057                             vect_prologue);
3058       dump_printf (MSG_NOTE,
3059                    "cost model: Adding cost of checks for loop "
3060                    "versioning aliasing.\n");
3061     }
3062
3063   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3064       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3065     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3066                           vect_prologue);
3067
3068   /* Count statements in scalar loop.  Using this as scalar cost for a single
3069      iteration for now.
3070
3071      TODO: Add outer loop support.
3072
3073      TODO: Consider assigning different costs to different scalar
3074      statements.  */
3075
3076   scalar_single_iter_cost
3077     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3078
3079   /* Add additional cost for the peeled instructions in prologue and epilogue
3080      loop.
3081
3082      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3083      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3084
3085      TODO: Build an expression that represents peel_iters for prologue and
3086      epilogue to be used in a run-time test.  */
3087
3088   if (npeel  < 0)
3089     {
3090       peel_iters_prologue = vf/2;
3091       dump_printf (MSG_NOTE, "cost model: "
3092                    "prologue peel iters set to vf/2.\n");
3093
3094       /* If peeling for alignment is unknown, loop bound of main loop becomes
3095          unknown.  */
3096       peel_iters_epilogue = vf/2;
3097       dump_printf (MSG_NOTE, "cost model: "
3098                    "epilogue peel iters set to vf/2 because "
3099                    "peeling for alignment is unknown.\n");
3100
3101       /* If peeled iterations are unknown, count a taken branch and a not taken
3102          branch per peeled loop. Even if scalar loop iterations are known,
3103          vector iterations are not known since peeled prologue iterations are
3104          not known. Hence guards remain the same.  */
3105       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3106                             NULL, 0, vect_prologue);
3107       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3108                             NULL, 0, vect_prologue);
3109       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3110                             NULL, 0, vect_epilogue);
3111       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3112                             NULL, 0, vect_epilogue);
3113       stmt_info_for_cost *si;
3114       int j;
3115       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3116         {
3117           struct _stmt_vec_info *stmt_info
3118             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3119           (void) add_stmt_cost (target_cost_data,
3120                                 si->count * peel_iters_prologue,
3121                                 si->kind, stmt_info, si->misalign,
3122                                 vect_prologue);
3123           (void) add_stmt_cost (target_cost_data,
3124                                 si->count * peel_iters_epilogue,
3125                                 si->kind, stmt_info, si->misalign,
3126                                 vect_epilogue);
3127         }
3128     }
3129   else
3130     {
3131       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3132       stmt_info_for_cost *si;
3133       int j;
3134       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3135
3136       prologue_cost_vec.create (2);
3137       epilogue_cost_vec.create (2);
3138       peel_iters_prologue = npeel;
3139
3140       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3141                                           &peel_iters_epilogue,
3142                                           &LOOP_VINFO_SCALAR_ITERATION_COST
3143                                             (loop_vinfo),
3144                                           &prologue_cost_vec,
3145                                           &epilogue_cost_vec);
3146
3147       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3148         {
3149           struct _stmt_vec_info *stmt_info
3150             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3151           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3152                                 si->misalign, vect_prologue);
3153         }
3154
3155       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3156         {
3157           struct _stmt_vec_info *stmt_info
3158             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3159           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3160                                 si->misalign, vect_epilogue);
3161         }
3162
3163       prologue_cost_vec.release ();
3164       epilogue_cost_vec.release ();
3165     }
3166
3167   /* FORNOW: The scalar outside cost is incremented in one of the
3168      following ways:
3169
3170      1. The vectorizer checks for alignment and aliasing and generates
3171      a condition that allows dynamic vectorization.  A cost model
3172      check is ANDED with the versioning condition.  Hence scalar code
3173      path now has the added cost of the versioning check.
3174
3175        if (cost > th & versioning_check)
3176          jmp to vector code
3177
3178      Hence run-time scalar is incremented by not-taken branch cost.
3179
3180      2. The vectorizer then checks if a prologue is required.  If the
3181      cost model check was not done before during versioning, it has to
3182      be done before the prologue check.
3183
3184        if (cost <= th)
3185          prologue = scalar_iters
3186        if (prologue == 0)
3187          jmp to vector code
3188        else
3189          execute prologue
3190        if (prologue == num_iters)
3191          go to exit
3192
3193      Hence the run-time scalar cost is incremented by a taken branch,
3194      plus a not-taken branch, plus a taken branch cost.
3195
3196      3. The vectorizer then checks if an epilogue is required.  If the
3197      cost model check was not done before during prologue check, it
3198      has to be done with the epilogue check.
3199
3200        if (prologue == 0)
3201          jmp to vector code
3202        else
3203          execute prologue
3204        if (prologue == num_iters)
3205          go to exit
3206        vector code:
3207          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3208            jmp to epilogue
3209
3210      Hence the run-time scalar cost should be incremented by 2 taken
3211      branches.
3212
3213      TODO: The back end may reorder the BBS's differently and reverse
3214      conditions/branch directions.  Change the estimates below to
3215      something more reasonable.  */
3216
3217   /* If the number of iterations is known and we do not do versioning, we can
3218      decide whether to vectorize at compile time.  Hence the scalar version
3219      do not carry cost model guard costs.  */
3220   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3221       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3222       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3223     {
3224       /* Cost model check occurs at versioning.  */
3225       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3226           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3227         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3228       else
3229         {
3230           /* Cost model check occurs at prologue generation.  */
3231           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3232             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3233               + vect_get_stmt_cost (cond_branch_not_taken);
3234           /* Cost model check occurs at epilogue generation.  */
3235           else
3236             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3237         }
3238     }
3239
3240   /* Complete the target-specific cost calculations.  */
3241   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3242                &vec_inside_cost, &vec_epilogue_cost);
3243
3244   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3245
3246   if (dump_enabled_p ())
3247     {
3248       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3249       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3250                    vec_inside_cost);
3251       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3252                    vec_prologue_cost);
3253       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3254                    vec_epilogue_cost);
3255       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3256                    scalar_single_iter_cost);
3257       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3258                    scalar_outside_cost);
3259       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3260                    vec_outside_cost);
3261       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3262                    peel_iters_prologue);
3263       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3264                    peel_iters_epilogue);
3265     }
3266
3267   /* Calculate number of iterations required to make the vector version
3268      profitable, relative to the loop bodies only.  The following condition
3269      must hold true:
3270      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3271      where
3272      SIC = scalar iteration cost, VIC = vector iteration cost,
3273      VOC = vector outside cost, VF = vectorization factor,
3274      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3275      SOC = scalar outside cost for run time cost model check.  */
3276
3277   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3278     {
3279       if (vec_outside_cost <= 0)
3280         min_profitable_iters = 1;
3281       else
3282         {
3283           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3284                                   - vec_inside_cost * peel_iters_prologue
3285                                   - vec_inside_cost * peel_iters_epilogue)
3286                                  / ((scalar_single_iter_cost * vf)
3287                                     - vec_inside_cost);
3288
3289           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3290               <= (((int) vec_inside_cost * min_profitable_iters)
3291                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3292             min_profitable_iters++;
3293         }
3294     }
3295   /* vector version will never be profitable.  */
3296   else
3297     {
3298       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3299         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3300                     "did not happen for a simd loop");
3301
3302       if (dump_enabled_p ())
3303         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3304                          "cost model: the vector iteration cost = %d "
3305                          "divided by the scalar iteration cost = %d "
3306                          "is greater or equal to the vectorization factor = %d"
3307                          ".\n",
3308                          vec_inside_cost, scalar_single_iter_cost, vf);
3309       *ret_min_profitable_niters = -1;
3310       *ret_min_profitable_estimate = -1;
3311       return;
3312     }
3313
3314   dump_printf (MSG_NOTE,
3315                "  Calculated minimum iters for profitability: %d\n",
3316                min_profitable_iters);
3317
3318   min_profitable_iters =
3319         min_profitable_iters < vf ? vf : min_profitable_iters;
3320
3321   /* Because the condition we create is:
3322      if (niters <= min_profitable_iters)
3323        then skip the vectorized loop.  */
3324   min_profitable_iters--;
3325
3326   if (dump_enabled_p ())
3327     dump_printf_loc (MSG_NOTE, vect_location,
3328                      "  Runtime profitability threshold = %d\n",
3329                      min_profitable_iters);
3330
3331   *ret_min_profitable_niters = min_profitable_iters;
3332
3333   /* Calculate number of iterations required to make the vector version
3334      profitable, relative to the loop bodies only.
3335
3336      Non-vectorized variant is SIC * niters and it must win over vector
3337      variant on the expected loop trip count.  The following condition must hold true:
3338      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3339
3340   if (vec_outside_cost <= 0)
3341     min_profitable_estimate = 1;
3342   else
3343     {
3344       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3345                                  - vec_inside_cost * peel_iters_prologue
3346                                  - vec_inside_cost * peel_iters_epilogue)
3347                                  / ((scalar_single_iter_cost * vf)
3348                                    - vec_inside_cost);
3349     }
3350   min_profitable_estimate --;
3351   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3352   if (dump_enabled_p ())
3353     dump_printf_loc (MSG_NOTE, vect_location,
3354                      "  Static estimate profitability threshold = %d\n",
3355                       min_profitable_iters);
3356
3357   *ret_min_profitable_estimate = min_profitable_estimate;
3358 }
3359
3360 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3361    vector elements (not bits) for a vector of mode MODE.  */
3362 static void
3363 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3364                               unsigned char *sel)
3365 {
3366   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3367
3368   for (i = 0; i < nelt; i++)
3369     sel[i] = (i + offset) & (2*nelt - 1);
3370 }
3371
3372 /* Checks whether the target supports whole-vector shifts for vectors of mode
3373    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3374    it supports vec_perm_const with masks for all necessary shift amounts.  */
3375 static bool
3376 have_whole_vector_shift (enum machine_mode mode)
3377 {
3378   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3379     return true;
3380
3381   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3382     return false;
3383
3384   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3385   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3386
3387   for (i = nelt/2; i >= 1; i/=2)
3388     {
3389       calc_vec_perm_mask_for_shift (mode, i, sel);
3390       if (!can_vec_perm_p (mode, false, sel))
3391         return false;
3392     }
3393   return true;
3394 }
3395
3396 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3397
3398 static tree
3399 get_reduction_op (gimple *stmt, int reduc_index)
3400 {
3401   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3402     {
3403     case GIMPLE_SINGLE_RHS:
3404       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3405                   == ternary_op);
3406       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3407     case GIMPLE_UNARY_RHS:
3408       return gimple_assign_rhs1 (stmt);
3409     case GIMPLE_BINARY_RHS:
3410       return (reduc_index
3411               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3412     case GIMPLE_TERNARY_RHS:
3413       return gimple_op (stmt, reduc_index + 1);
3414     default:
3415       gcc_unreachable ();
3416     }
3417 }
3418
3419 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3420    functions. Design better to avoid maintenance issues.  */
3421
3422 /* Function vect_model_reduction_cost.
3423
3424    Models cost for a reduction operation, including the vector ops
3425    generated within the strip-mine loop, the initial definition before
3426    the loop, and the epilogue code that must be generated.  */
3427
3428 static bool
3429 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3430                            int ncopies, int reduc_index)
3431 {
3432   int prologue_cost = 0, epilogue_cost = 0;
3433   enum tree_code code;
3434   optab optab;
3435   tree vectype;
3436   gimple *stmt, *orig_stmt;
3437   tree reduction_op;
3438   machine_mode mode;
3439   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3440   struct loop *loop = NULL;
3441   void *target_cost_data;
3442
3443   if (loop_vinfo)
3444     {
3445       loop = LOOP_VINFO_LOOP (loop_vinfo);
3446       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3447     }
3448   else
3449     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3450
3451   /* Condition reductions generate two reductions in the loop.  */
3452   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3453     ncopies *= 2;
3454
3455   /* Cost of reduction op inside loop.  */
3456   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3457                                         stmt_info, 0, vect_body);
3458   stmt = STMT_VINFO_STMT (stmt_info);
3459
3460   reduction_op = get_reduction_op (stmt, reduc_index);
3461
3462   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3463   if (!vectype)
3464     {
3465       if (dump_enabled_p ())
3466         {
3467           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468                            "unsupported data-type ");
3469           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3470                              TREE_TYPE (reduction_op));
3471           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3472         }
3473       return false;
3474    }
3475
3476   mode = TYPE_MODE (vectype);
3477   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3478
3479   if (!orig_stmt)
3480     orig_stmt = STMT_VINFO_STMT (stmt_info);
3481
3482   code = gimple_assign_rhs_code (orig_stmt);
3483
3484   /* Add in cost for initial definition.
3485      For cond reduction we have four vectors: initial index, step, initial
3486      result of the data reduction, initial value of the index reduction.  */
3487   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3488                        == COND_REDUCTION ? 4 : 1;
3489   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3490                                   scalar_to_vec, stmt_info, 0,
3491                                   vect_prologue);
3492
3493   /* Determine cost of epilogue code.
3494
3495      We have a reduction operator that will reduce the vector in one statement.
3496      Also requires scalar extract.  */
3497
3498   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3499     {
3500       if (reduc_code != ERROR_MARK)
3501         {
3502           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3503             {
3504               /* An EQ stmt and an COND_EXPR stmt.  */
3505               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3506                                               vector_stmt, stmt_info, 0,
3507                                               vect_epilogue);
3508               /* Reduction of the max index and a reduction of the found
3509                  values.  */
3510               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3511                                               vec_to_scalar, stmt_info, 0,
3512                                               vect_epilogue);
3513               /* A broadcast of the max value.  */
3514               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3515                                               scalar_to_vec, stmt_info, 0,
3516                                               vect_epilogue);
3517             }
3518           else
3519             {
3520               epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3521                                               stmt_info, 0, vect_epilogue);
3522               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3523                                               vec_to_scalar, stmt_info, 0,
3524                                               vect_epilogue);
3525             }
3526         }
3527       else
3528         {
3529           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3530           tree bitsize =
3531             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3532           int element_bitsize = tree_to_uhwi (bitsize);
3533           int nelements = vec_size_in_bits / element_bitsize;
3534
3535           optab = optab_for_tree_code (code, vectype, optab_default);
3536
3537           /* We have a whole vector shift available.  */
3538           if (VECTOR_MODE_P (mode)
3539               && optab_handler (optab, mode) != CODE_FOR_nothing
3540               && have_whole_vector_shift (mode))
3541             {
3542               /* Final reduction via vector shifts and the reduction operator.
3543                  Also requires scalar extract.  */
3544               epilogue_cost += add_stmt_cost (target_cost_data,
3545                                               exact_log2 (nelements) * 2,
3546                                               vector_stmt, stmt_info, 0,
3547                                               vect_epilogue);
3548               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3549                                               vec_to_scalar, stmt_info, 0,
3550                                               vect_epilogue);
3551             }
3552           else
3553             /* Use extracts and reduction op for final reduction.  For N
3554                elements, we have N extracts and N-1 reduction ops.  */
3555             epilogue_cost += add_stmt_cost (target_cost_data,
3556                                             nelements + nelements - 1,
3557                                             vector_stmt, stmt_info, 0,
3558                                             vect_epilogue);
3559         }
3560     }
3561
3562   if (dump_enabled_p ())
3563     dump_printf (MSG_NOTE,
3564                  "vect_model_reduction_cost: inside_cost = %d, "
3565                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3566                  prologue_cost, epilogue_cost);
3567
3568   return true;
3569 }
3570
3571
3572 /* Function vect_model_induction_cost.
3573
3574    Models cost for induction operations.  */
3575
3576 static void
3577 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3578 {
3579   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3580   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3581   unsigned inside_cost, prologue_cost;
3582
3583   /* loop cost for vec_loop.  */
3584   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3585                                stmt_info, 0, vect_body);
3586
3587   /* prologue cost for vec_init and vec_step.  */
3588   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3589                                  stmt_info, 0, vect_prologue);
3590
3591   if (dump_enabled_p ())
3592     dump_printf_loc (MSG_NOTE, vect_location,
3593                      "vect_model_induction_cost: inside_cost = %d, "
3594                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3595 }
3596
3597
3598 /* Function get_initial_def_for_induction
3599
3600    Input:
3601    STMT - a stmt that performs an induction operation in the loop.
3602    IV_PHI - the initial value of the induction variable
3603
3604    Output:
3605    Return a vector variable, initialized with the first VF values of
3606    the induction variable.  E.g., for an iv with IV_PHI='X' and
3607    evolution S, for a vector of 4 units, we want to return:
3608    [X, X + S, X + 2*S, X + 3*S].  */
3609
3610 static tree
3611 get_initial_def_for_induction (gimple *iv_phi)
3612 {
3613   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3614   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3615   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3616   tree vectype;
3617   int nunits;
3618   edge pe = loop_preheader_edge (loop);
3619   struct loop *iv_loop;
3620   basic_block new_bb;
3621   tree new_vec, vec_init, vec_step, t;
3622   tree new_name;
3623   gimple *new_stmt;
3624   gphi *induction_phi;
3625   tree induc_def, vec_def, vec_dest;
3626   tree init_expr, step_expr;
3627   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3628   int i;
3629   int ncopies;
3630   tree expr;
3631   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3632   bool nested_in_vect_loop = false;
3633   gimple_seq stmts;
3634   imm_use_iterator imm_iter;
3635   use_operand_p use_p;
3636   gimple *exit_phi;
3637   edge latch_e;
3638   tree loop_arg;
3639   gimple_stmt_iterator si;
3640   basic_block bb = gimple_bb (iv_phi);
3641   tree stepvectype;
3642   tree resvectype;
3643
3644   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3645   if (nested_in_vect_loop_p (loop, iv_phi))
3646     {
3647       nested_in_vect_loop = true;
3648       iv_loop = loop->inner;
3649     }
3650   else
3651     iv_loop = loop;
3652   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3653
3654   latch_e = loop_latch_edge (iv_loop);
3655   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3656
3657   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3658   gcc_assert (step_expr != NULL_TREE);
3659
3660   pe = loop_preheader_edge (iv_loop);
3661   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3662                                      loop_preheader_edge (iv_loop));
3663
3664   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3665   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3666   gcc_assert (vectype);
3667   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3668   ncopies = vf / nunits;
3669
3670   gcc_assert (phi_info);
3671   gcc_assert (ncopies >= 1);
3672
3673   /* Convert the step to the desired type.  */
3674   stmts = NULL;
3675   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
3676   if (stmts)
3677     {
3678       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3679       gcc_assert (!new_bb);
3680     }
3681
3682   /* Find the first insertion point in the BB.  */
3683   si = gsi_after_labels (bb);
3684
3685   /* Create the vector that holds the initial_value of the induction.  */
3686   if (nested_in_vect_loop)
3687     {
3688       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3689          been created during vectorization of previous stmts.  We obtain it
3690          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3691       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi);
3692       /* If the initial value is not of proper type, convert it.  */
3693       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3694         {
3695           new_stmt
3696             = gimple_build_assign (vect_get_new_ssa_name (vectype,
3697                                                           vect_simple_var,
3698                                                           "vec_iv_"),
3699                                    VIEW_CONVERT_EXPR,
3700                                    build1 (VIEW_CONVERT_EXPR, vectype,
3701                                            vec_init));
3702           vec_init = gimple_assign_lhs (new_stmt);
3703           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3704                                                  new_stmt);
3705           gcc_assert (!new_bb);
3706           set_vinfo_for_stmt (new_stmt,
3707                               new_stmt_vec_info (new_stmt, loop_vinfo));
3708         }
3709     }
3710   else
3711     {
3712       vec<constructor_elt, va_gc> *v;
3713
3714       /* iv_loop is the loop to be vectorized. Create:
3715          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3716       stmts = NULL;
3717       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
3718
3719       vec_alloc (v, nunits);
3720       bool constant_p = is_gimple_min_invariant (new_name);
3721       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3722       for (i = 1; i < nunits; i++)
3723         {
3724           /* Create: new_name_i = new_name + step_expr  */
3725           new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
3726                                    new_name, step_expr);
3727           if (!is_gimple_min_invariant (new_name))
3728             constant_p = false;
3729           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3730         }
3731       if (stmts)
3732         {
3733           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3734           gcc_assert (!new_bb);
3735         }
3736
3737       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3738       if (constant_p)
3739         new_vec = build_vector_from_ctor (vectype, v);
3740       else
3741         new_vec = build_constructor (vectype, v);
3742       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3743     }
3744
3745
3746   /* Create the vector that holds the step of the induction.  */
3747   if (nested_in_vect_loop)
3748     /* iv_loop is nested in the loop to be vectorized. Generate:
3749        vec_step = [S, S, S, S]  */
3750     new_name = step_expr;
3751   else
3752     {
3753       /* iv_loop is the loop to be vectorized. Generate:
3754           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3755       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3756         {
3757           expr = build_int_cst (integer_type_node, vf);
3758           expr = fold_convert (TREE_TYPE (step_expr), expr);
3759         }
3760       else
3761         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3762       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3763                               expr, step_expr);
3764       if (TREE_CODE (step_expr) == SSA_NAME)
3765         new_name = vect_init_vector (iv_phi, new_name,
3766                                      TREE_TYPE (step_expr), NULL);
3767     }
3768
3769   t = unshare_expr (new_name);
3770   gcc_assert (CONSTANT_CLASS_P (new_name)
3771               || TREE_CODE (new_name) == SSA_NAME);
3772   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3773   gcc_assert (stepvectype);
3774   new_vec = build_vector_from_val (stepvectype, t);
3775   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3776
3777
3778   /* Create the following def-use cycle:
3779      loop prolog:
3780          vec_init = ...
3781          vec_step = ...
3782      loop:
3783          vec_iv = PHI <vec_init, vec_loop>
3784          ...
3785          STMT
3786          ...
3787          vec_loop = vec_iv + vec_step;  */
3788
3789   /* Create the induction-phi that defines the induction-operand.  */
3790   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3791   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3792   set_vinfo_for_stmt (induction_phi,
3793                       new_stmt_vec_info (induction_phi, loop_vinfo));
3794   induc_def = PHI_RESULT (induction_phi);
3795
3796   /* Create the iv update inside the loop  */
3797   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3798   vec_def = make_ssa_name (vec_dest, new_stmt);
3799   gimple_assign_set_lhs (new_stmt, vec_def);
3800   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3801   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
3802
3803   /* Set the arguments of the phi node:  */
3804   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3805   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3806                UNKNOWN_LOCATION);
3807
3808
3809   /* In case that vectorization factor (VF) is bigger than the number
3810      of elements that we can fit in a vectype (nunits), we have to generate
3811      more than one vector stmt - i.e - we need to "unroll" the
3812      vector stmt by a factor VF/nunits.  For more details see documentation
3813      in vectorizable_operation.  */
3814
3815   if (ncopies > 1)
3816     {
3817       stmt_vec_info prev_stmt_vinfo;
3818       /* FORNOW. This restriction should be relaxed.  */
3819       gcc_assert (!nested_in_vect_loop);
3820
3821       /* Create the vector that holds the step of the induction.  */
3822       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3823         {
3824           expr = build_int_cst (integer_type_node, nunits);
3825           expr = fold_convert (TREE_TYPE (step_expr), expr);
3826         }
3827       else
3828         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3829       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3830                               expr, step_expr);
3831       if (TREE_CODE (step_expr) == SSA_NAME)
3832         new_name = vect_init_vector (iv_phi, new_name,
3833                                      TREE_TYPE (step_expr), NULL);
3834       t = unshare_expr (new_name);
3835       gcc_assert (CONSTANT_CLASS_P (new_name)
3836                   || TREE_CODE (new_name) == SSA_NAME);
3837       new_vec = build_vector_from_val (stepvectype, t);
3838       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3839
3840       vec_def = induc_def;
3841       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3842       for (i = 1; i < ncopies; i++)
3843         {
3844           /* vec_i = vec_prev + vec_step  */
3845           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3846                                           vec_def, vec_step);
3847           vec_def = make_ssa_name (vec_dest, new_stmt);
3848           gimple_assign_set_lhs (new_stmt, vec_def);
3849
3850           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3851           if (!useless_type_conversion_p (resvectype, vectype))
3852             {
3853               new_stmt
3854                 = gimple_build_assign
3855                         (vect_get_new_vect_var (resvectype, vect_simple_var,
3856                                                 "vec_iv_"),
3857                          VIEW_CONVERT_EXPR,
3858                          build1 (VIEW_CONVERT_EXPR, resvectype,
3859                                  gimple_assign_lhs (new_stmt)));
3860               gimple_assign_set_lhs (new_stmt,
3861                                      make_ssa_name
3862                                        (gimple_assign_lhs (new_stmt), new_stmt));
3863               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3864             }
3865           set_vinfo_for_stmt (new_stmt,
3866                               new_stmt_vec_info (new_stmt, loop_vinfo));
3867           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3868           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3869         }
3870     }
3871
3872   if (nested_in_vect_loop)
3873     {
3874       /* Find the loop-closed exit-phi of the induction, and record
3875          the final vector of induction results:  */
3876       exit_phi = NULL;
3877       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3878         {
3879           gimple *use_stmt = USE_STMT (use_p);
3880           if (is_gimple_debug (use_stmt))
3881             continue;
3882
3883           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3884             {
3885               exit_phi = use_stmt;
3886               break;
3887             }
3888         }
3889       if (exit_phi)
3890         {
3891           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3892           /* FORNOW. Currently not supporting the case that an inner-loop induction
3893              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3894           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3895                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3896
3897           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3898           if (dump_enabled_p ())
3899             {
3900               dump_printf_loc (MSG_NOTE, vect_location,
3901                                "vector of inductions after inner-loop:");
3902               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3903               dump_printf (MSG_NOTE, "\n");
3904             }
3905         }
3906     }
3907
3908
3909   if (dump_enabled_p ())
3910     {
3911       dump_printf_loc (MSG_NOTE, vect_location,
3912                        "transform induction: created def-use cycle: ");
3913       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3914       dump_printf (MSG_NOTE, "\n");
3915       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3916                         SSA_NAME_DEF_STMT (vec_def), 0);
3917       dump_printf (MSG_NOTE, "\n");
3918     }
3919
3920   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3921   if (!useless_type_conversion_p (resvectype, vectype))
3922     {
3923       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3924                                                              vect_simple_var,
3925                                                              "vec_iv_"),
3926                                       VIEW_CONVERT_EXPR,
3927                                       build1 (VIEW_CONVERT_EXPR, resvectype,
3928                                               induc_def));
3929       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3930       gimple_assign_set_lhs (new_stmt, induc_def);
3931       si = gsi_after_labels (bb);
3932       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3933       set_vinfo_for_stmt (new_stmt,
3934                           new_stmt_vec_info (new_stmt, loop_vinfo));
3935       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3936         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3937     }
3938
3939   return induc_def;
3940 }
3941
3942
3943 /* Function get_initial_def_for_reduction
3944
3945    Input:
3946    STMT - a stmt that performs a reduction operation in the loop.
3947    INIT_VAL - the initial value of the reduction variable
3948
3949    Output:
3950    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3951         of the reduction (used for adjusting the epilog - see below).
3952    Return a vector variable, initialized according to the operation that STMT
3953         performs. This vector will be used as the initial value of the
3954         vector of partial results.
3955
3956    Option1 (adjust in epilog): Initialize the vector as follows:
3957      add/bit or/xor:    [0,0,...,0,0]
3958      mult/bit and:      [1,1,...,1,1]
3959      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3960    and when necessary (e.g. add/mult case) let the caller know
3961    that it needs to adjust the result by init_val.
3962
3963    Option2: Initialize the vector as follows:
3964      add/bit or/xor:    [init_val,0,0,...,0]
3965      mult/bit and:      [init_val,1,1,...,1]
3966      min/max/cond_expr: [init_val,init_val,...,init_val]
3967    and no adjustments are needed.
3968
3969    For example, for the following code:
3970
3971    s = init_val;
3972    for (i=0;i<n;i++)
3973      s = s + a[i];
3974
3975    STMT is 's = s + a[i]', and the reduction variable is 's'.
3976    For a vector of 4 units, we want to return either [0,0,0,init_val],
3977    or [0,0,0,0] and let the caller know that it needs to adjust
3978    the result at the end by 'init_val'.
3979
3980    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3981    initialization vector is simpler (same element in all entries), if
3982    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3983
3984    A cost model should help decide between these two schemes.  */
3985
3986 tree
3987 get_initial_def_for_reduction (gimple *stmt, tree init_val,
3988                                tree *adjustment_def)
3989 {
3990   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3991   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3992   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3993   tree scalar_type = TREE_TYPE (init_val);
3994   tree vectype = get_vectype_for_scalar_type (scalar_type);
3995   int nunits;
3996   enum tree_code code = gimple_assign_rhs_code (stmt);
3997   tree def_for_init;
3998   tree init_def;
3999   tree *elts;
4000   int i;
4001   bool nested_in_vect_loop = false;
4002   tree init_value;
4003   REAL_VALUE_TYPE real_init_val = dconst0;
4004   int int_init_val = 0;
4005   gimple *def_stmt = NULL;
4006
4007   gcc_assert (vectype);
4008   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4009
4010   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
4011               || SCALAR_FLOAT_TYPE_P (scalar_type));
4012
4013   if (nested_in_vect_loop_p (loop, stmt))
4014     nested_in_vect_loop = true;
4015   else
4016     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
4017
4018   /* In case of double reduction we only create a vector variable to be put
4019      in the reduction phi node.  The actual statement creation is done in
4020      vect_create_epilog_for_reduction.  */
4021   if (adjustment_def && nested_in_vect_loop
4022       && TREE_CODE (init_val) == SSA_NAME
4023       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
4024       && gimple_code (def_stmt) == GIMPLE_PHI
4025       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
4026       && vinfo_for_stmt (def_stmt)
4027       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
4028           == vect_double_reduction_def)
4029     {
4030       *adjustment_def = NULL;
4031       return vect_create_destination_var (init_val, vectype);
4032     }
4033
4034   if (TREE_CONSTANT (init_val))
4035     {
4036       if (SCALAR_FLOAT_TYPE_P (scalar_type))
4037         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
4038       else
4039         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
4040     }
4041   else
4042     init_value = init_val;
4043
4044   switch (code)
4045     {
4046       case WIDEN_SUM_EXPR:
4047       case DOT_PROD_EXPR:
4048       case SAD_EXPR:
4049       case PLUS_EXPR:
4050       case MINUS_EXPR:
4051       case BIT_IOR_EXPR:
4052       case BIT_XOR_EXPR:
4053       case MULT_EXPR:
4054       case BIT_AND_EXPR:
4055         /* ADJUSMENT_DEF is NULL when called from
4056            vect_create_epilog_for_reduction to vectorize double reduction.  */
4057         if (adjustment_def)
4058           {
4059             if (nested_in_vect_loop)
4060               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt);
4061             else
4062               *adjustment_def = init_val;
4063           }
4064
4065         if (code == MULT_EXPR)
4066           {
4067             real_init_val = dconst1;
4068             int_init_val = 1;
4069           }
4070
4071         if (code == BIT_AND_EXPR)
4072           int_init_val = -1;
4073
4074         if (SCALAR_FLOAT_TYPE_P (scalar_type))
4075           def_for_init = build_real (scalar_type, real_init_val);
4076         else
4077           def_for_init = build_int_cst (scalar_type, int_init_val);
4078
4079         /* Create a vector of '0' or '1' except the first element.  */
4080         elts = XALLOCAVEC (tree, nunits);
4081         for (i = nunits - 2; i >= 0; --i)
4082           elts[i + 1] = def_for_init;
4083
4084         /* Option1: the first element is '0' or '1' as well.  */
4085         if (adjustment_def)
4086           {
4087             elts[0] = def_for_init;
4088             init_def = build_vector (vectype, elts);
4089             break;
4090           }
4091
4092         /* Option2: the first element is INIT_VAL.  */
4093         elts[0] = init_val;
4094         if (TREE_CONSTANT (init_val))
4095           init_def = build_vector (vectype, elts);
4096         else
4097           {
4098             vec<constructor_elt, va_gc> *v;
4099             vec_alloc (v, nunits);
4100             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4101             for (i = 1; i < nunits; ++i)
4102               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4103             init_def = build_constructor (vectype, v);
4104           }
4105
4106         break;
4107
4108       case MIN_EXPR:
4109       case MAX_EXPR:
4110       case COND_EXPR:
4111         if (adjustment_def)
4112           {
4113             *adjustment_def = NULL_TREE;
4114             if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4115               {
4116                 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4117                 break;
4118               }
4119           }
4120         init_def = build_vector_from_val (vectype, init_value);
4121         break;
4122
4123       default:
4124         gcc_unreachable ();
4125     }
4126
4127   return init_def;
4128 }
4129
4130 /* Function vect_create_epilog_for_reduction
4131
4132    Create code at the loop-epilog to finalize the result of a reduction
4133    computation.
4134
4135    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4136      reduction statements.
4137    STMT is the scalar reduction stmt that is being vectorized.
4138    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4139      number of elements that we can fit in a vectype (nunits).  In this case
4140      we have to generate more than one vector stmt - i.e - we need to "unroll"
4141      the vector stmt by a factor VF/nunits.  For more details see documentation
4142      in vectorizable_operation.
4143    REDUC_CODE is the tree-code for the epilog reduction.
4144    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4145      computation.
4146    REDUC_INDEX is the index of the operand in the right hand side of the
4147      statement that is defined by REDUCTION_PHI.
4148    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4149    SLP_NODE is an SLP node containing a group of reduction statements. The
4150      first one in this group is STMT.
4151    INDUCTION_INDEX is the index of the loop for condition reductions.
4152      Otherwise it is undefined.
4153
4154    This function:
4155    1. Creates the reduction def-use cycles: sets the arguments for
4156       REDUCTION_PHIS:
4157       The loop-entry argument is the vectorized initial-value of the reduction.
4158       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4159       sums.
4160    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4161       by applying the operation specified by REDUC_CODE if available, or by
4162       other means (whole-vector shifts or a scalar loop).
4163       The function also creates a new phi node at the loop exit to preserve
4164       loop-closed form, as illustrated below.
4165
4166      The flow at the entry to this function:
4167
4168         loop:
4169           vec_def = phi <null, null>            # REDUCTION_PHI
4170           VECT_DEF = vector_stmt                # vectorized form of STMT
4171           s_loop = scalar_stmt                  # (scalar) STMT
4172         loop_exit:
4173           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4174           use <s_out0>
4175           use <s_out0>
4176
4177      The above is transformed by this function into:
4178
4179         loop:
4180           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4181           VECT_DEF = vector_stmt                # vectorized form of STMT
4182           s_loop = scalar_stmt                  # (scalar) STMT
4183         loop_exit:
4184           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4185           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4186           v_out2 = reduce <v_out1>
4187           s_out3 = extract_field <v_out2, 0>
4188           s_out4 = adjust_result <s_out3>
4189           use <s_out4>
4190           use <s_out4>
4191 */
4192
4193 static void
4194 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4195                                   int ncopies, enum tree_code reduc_code,
4196                                   vec<gimple *> reduction_phis,
4197                                   int reduc_index, bool double_reduc,
4198                                   slp_tree slp_node, tree induction_index)
4199 {
4200   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4201   stmt_vec_info prev_phi_info;
4202   tree vectype;
4203   machine_mode mode;
4204   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4205   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4206   basic_block exit_bb;
4207   tree scalar_dest;
4208   tree scalar_type;
4209   gimple *new_phi = NULL, *phi;
4210   gimple_stmt_iterator exit_gsi;
4211   tree vec_dest;
4212   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4213   gimple *epilog_stmt = NULL;
4214   enum tree_code code = gimple_assign_rhs_code (stmt);
4215   gimple *exit_phi;
4216   tree bitsize;
4217   tree adjustment_def = NULL;
4218   tree vec_initial_def = NULL;
4219   tree reduction_op, expr, def, initial_def = NULL;
4220   tree orig_name, scalar_result;
4221   imm_use_iterator imm_iter, phi_imm_iter;
4222   use_operand_p use_p, phi_use_p;
4223   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4224   bool nested_in_vect_loop = false;
4225   auto_vec<gimple *> new_phis;
4226   auto_vec<gimple *> inner_phis;
4227   enum vect_def_type dt = vect_unknown_def_type;
4228   int j, i;
4229   auto_vec<tree> scalar_results;
4230   unsigned int group_size = 1, k, ratio;
4231   auto_vec<tree> vec_initial_defs;
4232   auto_vec<gimple *> phis;
4233   bool slp_reduc = false;
4234   tree new_phi_result;
4235   gimple *inner_phi = NULL;
4236
4237   if (slp_node)
4238     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4239
4240   if (nested_in_vect_loop_p (loop, stmt))
4241     {
4242       outer_loop = loop;
4243       loop = loop->inner;
4244       nested_in_vect_loop = true;
4245       gcc_assert (!slp_node);
4246     }
4247
4248   reduction_op = get_reduction_op (stmt, reduc_index);
4249
4250   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4251   gcc_assert (vectype);
4252   mode = TYPE_MODE (vectype);
4253
4254   /* 1. Create the reduction def-use cycle:
4255      Set the arguments of REDUCTION_PHIS, i.e., transform
4256
4257         loop:
4258           vec_def = phi <null, null>            # REDUCTION_PHI
4259           VECT_DEF = vector_stmt                # vectorized form of STMT
4260           ...
4261
4262      into:
4263
4264         loop:
4265           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4266           VECT_DEF = vector_stmt                # vectorized form of STMT
4267           ...
4268
4269      (in case of SLP, do it for all the phis). */
4270
4271   /* Get the loop-entry arguments.  */
4272   if (slp_node)
4273     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4274                        NULL, slp_node, reduc_index);
4275   else
4276     {
4277       /* Get at the scalar def before the loop, that defines the initial value
4278          of the reduction variable.  */
4279       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4280       initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4281                                            loop_preheader_edge (loop));
4282       vec_initial_defs.create (1);
4283       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4284                                                        &adjustment_def);
4285       vec_initial_defs.quick_push (vec_initial_def);
4286     }
4287
4288   /* Set phi nodes arguments.  */
4289   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4290     {
4291       tree vec_init_def, def;
4292       gimple_seq stmts;
4293       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4294                                            true, NULL_TREE);
4295       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4296       def = vect_defs[i];
4297       for (j = 0; j < ncopies; j++)
4298         {
4299           /* Set the loop-entry arg of the reduction-phi.  */
4300
4301           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4302               == INTEGER_INDUC_COND_REDUCTION)
4303             {
4304               /* Initialise the reduction phi to zero.  This prevents initial
4305                  values of non-zero interferring with the reduction op.  */
4306               gcc_assert (ncopies == 1);
4307               gcc_assert (i == 0);
4308
4309               tree vec_init_def_type = TREE_TYPE (vec_init_def);
4310               tree zero_vec = build_zero_cst (vec_init_def_type);
4311
4312               add_phi_arg (as_a <gphi *> (phi), zero_vec,
4313                            loop_preheader_edge (loop), UNKNOWN_LOCATION);
4314             }
4315           else
4316             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4317                          loop_preheader_edge (loop), UNKNOWN_LOCATION);
4318
4319           /* Set the loop-latch arg for the reduction-phi.  */
4320           if (j > 0)
4321             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4322
4323           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4324                        UNKNOWN_LOCATION);
4325
4326           if (dump_enabled_p ())
4327             {
4328               dump_printf_loc (MSG_NOTE, vect_location,
4329                                "transform reduction: created def-use cycle: ");
4330               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4331               dump_printf (MSG_NOTE, "\n");
4332               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4333               dump_printf (MSG_NOTE, "\n");
4334             }
4335
4336           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4337         }
4338     }
4339
4340   /* 2. Create epilog code.
4341         The reduction epilog code operates across the elements of the vector
4342         of partial results computed by the vectorized loop.
4343         The reduction epilog code consists of:
4344
4345         step 1: compute the scalar result in a vector (v_out2)
4346         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4347         step 3: adjust the scalar result (s_out3) if needed.
4348
4349         Step 1 can be accomplished using one the following three schemes:
4350           (scheme 1) using reduc_code, if available.
4351           (scheme 2) using whole-vector shifts, if available.
4352           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4353                      combined.
4354
4355           The overall epilog code looks like this:
4356
4357           s_out0 = phi <s_loop>         # original EXIT_PHI
4358           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4359           v_out2 = reduce <v_out1>              # step 1
4360           s_out3 = extract_field <v_out2, 0>    # step 2
4361           s_out4 = adjust_result <s_out3>       # step 3
4362
4363           (step 3 is optional, and steps 1 and 2 may be combined).
4364           Lastly, the uses of s_out0 are replaced by s_out4.  */
4365
4366
4367   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4368          v_out1 = phi <VECT_DEF>
4369          Store them in NEW_PHIS.  */
4370
4371   exit_bb = single_exit (loop)->dest;
4372   prev_phi_info = NULL;
4373   new_phis.create (vect_defs.length ());
4374   FOR_EACH_VEC_ELT (vect_defs, i, def)
4375     {
4376       for (j = 0; j < ncopies; j++)
4377         {
4378           tree new_def = copy_ssa_name (def);
4379           phi = create_phi_node (new_def, exit_bb);
4380           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4381           if (j == 0)
4382             new_phis.quick_push (phi);
4383           else
4384             {
4385               def = vect_get_vec_def_for_stmt_copy (dt, def);
4386               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4387             }
4388
4389           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4390           prev_phi_info = vinfo_for_stmt (phi);
4391         }
4392     }
4393
4394   /* The epilogue is created for the outer-loop, i.e., for the loop being
4395      vectorized.  Create exit phis for the outer loop.  */
4396   if (double_reduc)
4397     {
4398       loop = outer_loop;
4399       exit_bb = single_exit (loop)->dest;
4400       inner_phis.create (vect_defs.length ());
4401       FOR_EACH_VEC_ELT (new_phis, i, phi)
4402         {
4403           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4404           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4405           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4406                            PHI_RESULT (phi));
4407           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4408                                                             loop_vinfo));
4409           inner_phis.quick_push (phi);
4410           new_phis[i] = outer_phi;
4411           prev_phi_info = vinfo_for_stmt (outer_phi);
4412           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4413             {
4414               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4415               new_result = copy_ssa_name (PHI_RESULT (phi));
4416               outer_phi = create_phi_node (new_result, exit_bb);
4417               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4418                                PHI_RESULT (phi));
4419               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4420                                                                 loop_vinfo));
4421               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4422               prev_phi_info = vinfo_for_stmt (outer_phi);
4423             }
4424         }
4425     }
4426
4427   exit_gsi = gsi_after_labels (exit_bb);
4428
4429   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4430          (i.e. when reduc_code is not available) and in the final adjustment
4431          code (if needed).  Also get the original scalar reduction variable as
4432          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4433          represents a reduction pattern), the tree-code and scalar-def are
4434          taken from the original stmt that the pattern-stmt (STMT) replaces.
4435          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4436          are taken from STMT.  */
4437
4438   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4439   if (!orig_stmt)
4440     {
4441       /* Regular reduction  */
4442       orig_stmt = stmt;
4443     }
4444   else
4445     {
4446       /* Reduction pattern  */
4447       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4448       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4449       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4450     }
4451
4452   code = gimple_assign_rhs_code (orig_stmt);
4453   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4454      partial results are added and not subtracted.  */
4455   if (code == MINUS_EXPR)
4456     code = PLUS_EXPR;
4457
4458   scalar_dest = gimple_assign_lhs (orig_stmt);
4459   scalar_type = TREE_TYPE (scalar_dest);
4460   scalar_results.create (group_size);
4461   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4462   bitsize = TYPE_SIZE (scalar_type);
4463
4464   /* In case this is a reduction in an inner-loop while vectorizing an outer
4465      loop - we don't need to extract a single scalar result at the end of the
4466      inner-loop (unless it is double reduction, i.e., the use of reduction is
4467      outside the outer-loop).  The final vector of partial results will be used
4468      in the vectorized outer-loop, or reduced to a scalar result at the end of
4469      the outer-loop.  */
4470   if (nested_in_vect_loop && !double_reduc)
4471     goto vect_finalize_reduction;
4472
4473   /* SLP reduction without reduction chain, e.g.,
4474      # a1 = phi <a2, a0>
4475      # b1 = phi <b2, b0>
4476      a2 = operation (a1)
4477      b2 = operation (b1)  */
4478   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4479
4480   /* In case of reduction chain, e.g.,
4481      # a1 = phi <a3, a0>
4482      a2 = operation (a1)
4483      a3 = operation (a2),
4484
4485      we may end up with more than one vector result.  Here we reduce them to
4486      one vector.  */
4487   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4488     {
4489       tree first_vect = PHI_RESULT (new_phis[0]);
4490       tree tmp;
4491       gassign *new_vec_stmt = NULL;
4492
4493       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4494       for (k = 1; k < new_phis.length (); k++)
4495         {
4496           gimple *next_phi = new_phis[k];
4497           tree second_vect = PHI_RESULT (next_phi);
4498
4499           tmp = build2 (code, vectype,  first_vect, second_vect);
4500           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4501           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4502           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4503           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4504         }
4505
4506       new_phi_result = first_vect;
4507       if (new_vec_stmt)
4508         {
4509           new_phis.truncate (0);
4510           new_phis.safe_push (new_vec_stmt);
4511         }
4512     }
4513   else
4514     new_phi_result = PHI_RESULT (new_phis[0]);
4515
4516   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4517     {
4518       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4519          various data values where the condition matched and another vector
4520          (INDUCTION_INDEX) containing all the indexes of those matches.  We
4521          need to extract the last matching index (which will be the index with
4522          highest value) and use this to index into the data vector.
4523          For the case where there were no matches, the data vector will contain
4524          all default values and the index vector will be all zeros.  */
4525
4526       /* Get various versions of the type of the vector of indexes.  */
4527       tree index_vec_type = TREE_TYPE (induction_index);
4528       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4529       tree index_scalar_type = TREE_TYPE (index_vec_type);
4530       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4531         (index_vec_type);
4532
4533       /* Get an unsigned integer version of the type of the data vector.  */
4534       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4535       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4536       tree vectype_unsigned = build_vector_type
4537         (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4538
4539       /* First we need to create a vector (ZERO_VEC) of zeros and another
4540          vector (MAX_INDEX_VEC) filled with the last matching index, which we
4541          can create using a MAX reduction and then expanding.
4542          In the case where the loop never made any matches, the max index will
4543          be zero.  */
4544
4545       /* Vector of {0, 0, 0,...}.  */
4546       tree zero_vec = make_ssa_name (vectype);
4547       tree zero_vec_rhs = build_zero_cst (vectype);
4548       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4549       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4550
4551       /* Find maximum value from the vector of found indexes.  */
4552       tree max_index = make_ssa_name (index_scalar_type);
4553       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4554                                                     induction_index);
4555       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4556
4557       /* Vector of {max_index, max_index, max_index,...}.  */
4558       tree max_index_vec = make_ssa_name (index_vec_type);
4559       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4560                                                       max_index);
4561       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4562                                                         max_index_vec_rhs);
4563       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4564
4565       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4566          with the vector (INDUCTION_INDEX) of found indexes, choosing values
4567          from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4568          otherwise.  Only one value should match, resulting in a vector
4569          (VEC_COND) with one data value and the rest zeros.
4570          In the case where the loop never made any matches, every index will
4571          match, resulting in a vector with all data values (which will all be
4572          the default value).  */
4573
4574       /* Compare the max index vector to the vector of found indexes to find
4575          the position of the max value.  */
4576       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4577       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4578                                                       induction_index,
4579                                                       max_index_vec);
4580       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4581
4582       /* Use the compare to choose either values from the data vector or
4583          zero.  */
4584       tree vec_cond = make_ssa_name (vectype);
4585       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4586                                                    vec_compare, new_phi_result,
4587                                                    zero_vec);
4588       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4589
4590       /* Finally we need to extract the data value from the vector (VEC_COND)
4591          into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4592          reduction, but because this doesn't exist, we can use a MAX reduction
4593          instead.  The data value might be signed or a float so we need to cast
4594          it first.
4595          In the case where the loop never made any matches, the data values are
4596          all identical, and so will reduce down correctly.  */
4597
4598       /* Make the matched data values unsigned.  */
4599       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4600       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4601                                        vec_cond);
4602       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4603                                                         VIEW_CONVERT_EXPR,
4604                                                         vec_cond_cast_rhs);
4605       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4606
4607       /* Reduce down to a scalar value.  */
4608       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4609       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4610                                       optab_default);
4611       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4612                   != CODE_FOR_nothing);
4613       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4614                                                      REDUC_MAX_EXPR,
4615                                                      vec_cond_cast);
4616       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4617
4618       /* Convert the reduced value back to the result type and set as the
4619          result.  */
4620       tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4621                                      data_reduc);
4622       epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4623       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4624       gimple_assign_set_lhs (epilog_stmt, new_temp);
4625       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4626       scalar_results.safe_push (new_temp);
4627     }
4628
4629   /* 2.3 Create the reduction code, using one of the three schemes described
4630          above. In SLP we simply need to extract all the elements from the
4631          vector (without reducing them), so we use scalar shifts.  */
4632   else if (reduc_code != ERROR_MARK && !slp_reduc)
4633     {
4634       tree tmp;
4635       tree vec_elem_type;
4636
4637       /*** Case 1:  Create:
4638            v_out2 = reduc_expr <v_out1>  */
4639
4640       if (dump_enabled_p ())
4641         dump_printf_loc (MSG_NOTE, vect_location,
4642                          "Reduce using direct vector reduction.\n");
4643
4644       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4645       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4646         {
4647           tree tmp_dest =
4648               vect_create_destination_var (scalar_dest, vec_elem_type);
4649           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4650           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4651           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4652           gimple_assign_set_lhs (epilog_stmt, new_temp);
4653           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4654
4655           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4656         }
4657       else
4658         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4659
4660       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4661       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4662       gimple_assign_set_lhs (epilog_stmt, new_temp);
4663       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4664
4665       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4666           == INTEGER_INDUC_COND_REDUCTION)
4667         {
4668           /* Earlier we set the initial value to be zero.  Check the result
4669              and if it is zero then replace with the original initial
4670              value.  */
4671           tree zero = build_zero_cst (scalar_type);
4672           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4673
4674           tmp = make_ssa_name (new_scalar_dest);
4675           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4676                                              initial_def, new_temp);
4677           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4678           new_temp = tmp;
4679         }
4680
4681       scalar_results.safe_push (new_temp);
4682     }
4683   else
4684     {
4685       bool reduce_with_shift = have_whole_vector_shift (mode);
4686       int element_bitsize = tree_to_uhwi (bitsize);
4687       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4688       tree vec_temp;
4689
4690       /* Regardless of whether we have a whole vector shift, if we're
4691          emulating the operation via tree-vect-generic, we don't want
4692          to use it.  Only the first round of the reduction is likely
4693          to still be profitable via emulation.  */
4694       /* ??? It might be better to emit a reduction tree code here, so that
4695          tree-vect-generic can expand the first round via bit tricks.  */
4696       if (!VECTOR_MODE_P (mode))
4697         reduce_with_shift = false;
4698       else
4699         {
4700           optab optab = optab_for_tree_code (code, vectype, optab_default);
4701           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4702             reduce_with_shift = false;
4703         }
4704
4705       if (reduce_with_shift && !slp_reduc)
4706         {
4707           int nelements = vec_size_in_bits / element_bitsize;
4708           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4709
4710           int elt_offset;
4711
4712           tree zero_vec = build_zero_cst (vectype);
4713           /*** Case 2: Create:
4714              for (offset = nelements/2; offset >= 1; offset/=2)
4715                 {
4716                   Create:  va' = vec_shift <va, offset>
4717                   Create:  va = vop <va, va'>
4718                 }  */
4719
4720           tree rhs;
4721
4722           if (dump_enabled_p ())
4723             dump_printf_loc (MSG_NOTE, vect_location,
4724                              "Reduce using vector shifts\n");
4725
4726           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4727           new_temp = new_phi_result;
4728           for (elt_offset = nelements / 2;
4729                elt_offset >= 1;
4730                elt_offset /= 2)
4731             {
4732               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4733               tree mask = vect_gen_perm_mask_any (vectype, sel);
4734               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4735                                                  new_temp, zero_vec, mask);
4736               new_name = make_ssa_name (vec_dest, epilog_stmt);
4737               gimple_assign_set_lhs (epilog_stmt, new_name);
4738               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4739
4740               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4741                                                  new_temp);
4742               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4743               gimple_assign_set_lhs (epilog_stmt, new_temp);
4744               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4745             }
4746
4747           /* 2.4  Extract the final scalar result.  Create:
4748              s_out3 = extract_field <v_out2, bitpos>  */
4749
4750           if (dump_enabled_p ())
4751             dump_printf_loc (MSG_NOTE, vect_location,
4752                              "extract scalar result\n");
4753
4754           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4755                         bitsize, bitsize_zero_node);
4756           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4757           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4758           gimple_assign_set_lhs (epilog_stmt, new_temp);
4759           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4760           scalar_results.safe_push (new_temp);
4761         }
4762       else
4763         {
4764           /*** Case 3: Create:
4765              s = extract_field <v_out2, 0>
4766              for (offset = element_size;
4767                   offset < vector_size;
4768                   offset += element_size;)
4769                {
4770                  Create:  s' = extract_field <v_out2, offset>
4771                  Create:  s = op <s, s'>  // For non SLP cases
4772                }  */
4773
4774           if (dump_enabled_p ())
4775             dump_printf_loc (MSG_NOTE, vect_location,
4776                              "Reduce using scalar code.\n");
4777
4778           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4779           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4780             {
4781               int bit_offset;
4782               if (gimple_code (new_phi) == GIMPLE_PHI)
4783                 vec_temp = PHI_RESULT (new_phi);
4784               else
4785                 vec_temp = gimple_assign_lhs (new_phi);
4786               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4787                             bitsize_zero_node);
4788               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4789               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4790               gimple_assign_set_lhs (epilog_stmt, new_temp);
4791               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4792
4793               /* In SLP we don't need to apply reduction operation, so we just
4794                  collect s' values in SCALAR_RESULTS.  */
4795               if (slp_reduc)
4796                 scalar_results.safe_push (new_temp);
4797
4798               for (bit_offset = element_bitsize;
4799                    bit_offset < vec_size_in_bits;
4800                    bit_offset += element_bitsize)
4801                 {
4802                   tree bitpos = bitsize_int (bit_offset);
4803                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4804                                      bitsize, bitpos);
4805
4806                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4807                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4808                   gimple_assign_set_lhs (epilog_stmt, new_name);
4809                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4810
4811                   if (slp_reduc)
4812                     {
4813                       /* In SLP we don't need to apply reduction operation, so
4814                          we just collect s' values in SCALAR_RESULTS.  */
4815                       new_temp = new_name;
4816                       scalar_results.safe_push (new_name);
4817                     }
4818                   else
4819                     {
4820                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4821                                                          new_name, new_temp);
4822                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4823                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4824                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4825                     }
4826                 }
4827             }
4828
4829           /* The only case where we need to reduce scalar results in SLP, is
4830              unrolling.  If the size of SCALAR_RESULTS is greater than
4831              GROUP_SIZE, we reduce them combining elements modulo
4832              GROUP_SIZE.  */
4833           if (slp_reduc)
4834             {
4835               tree res, first_res, new_res;
4836               gimple *new_stmt;
4837
4838               /* Reduce multiple scalar results in case of SLP unrolling.  */
4839               for (j = group_size; scalar_results.iterate (j, &res);
4840                    j++)
4841                 {
4842                   first_res = scalar_results[j % group_size];
4843                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4844                                                   first_res, res);
4845                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4846                   gimple_assign_set_lhs (new_stmt, new_res);
4847                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4848                   scalar_results[j % group_size] = new_res;
4849                 }
4850             }
4851           else
4852             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4853             scalar_results.safe_push (new_temp);
4854         }
4855     }
4856
4857 vect_finalize_reduction:
4858
4859   if (double_reduc)
4860     loop = loop->inner;
4861
4862   /* 2.5 Adjust the final result by the initial value of the reduction
4863          variable. (When such adjustment is not needed, then
4864          'adjustment_def' is zero).  For example, if code is PLUS we create:
4865          new_temp = loop_exit_def + adjustment_def  */
4866
4867   if (adjustment_def)
4868     {
4869       gcc_assert (!slp_reduc);
4870       if (nested_in_vect_loop)
4871         {
4872           new_phi = new_phis[0];
4873           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4874           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4875           new_dest = vect_create_destination_var (scalar_dest, vectype);
4876         }
4877       else
4878         {
4879           new_temp = scalar_results[0];
4880           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4881           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4882           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4883         }
4884
4885       epilog_stmt = gimple_build_assign (new_dest, expr);
4886       new_temp = make_ssa_name (new_dest, epilog_stmt);
4887       gimple_assign_set_lhs (epilog_stmt, new_temp);
4888       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4889       if (nested_in_vect_loop)
4890         {
4891           set_vinfo_for_stmt (epilog_stmt,
4892                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
4893           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4894                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4895
4896           if (!double_reduc)
4897             scalar_results.quick_push (new_temp);
4898           else
4899             scalar_results[0] = new_temp;
4900         }
4901       else
4902         scalar_results[0] = new_temp;
4903
4904       new_phis[0] = epilog_stmt;
4905     }
4906
4907   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4908           phis with new adjusted scalar results, i.e., replace use <s_out0>
4909           with use <s_out4>.
4910
4911      Transform:
4912         loop_exit:
4913           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4914           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4915           v_out2 = reduce <v_out1>
4916           s_out3 = extract_field <v_out2, 0>
4917           s_out4 = adjust_result <s_out3>
4918           use <s_out0>
4919           use <s_out0>
4920
4921      into:
4922
4923         loop_exit:
4924           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4925           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4926           v_out2 = reduce <v_out1>
4927           s_out3 = extract_field <v_out2, 0>
4928           s_out4 = adjust_result <s_out3>
4929           use <s_out4>
4930           use <s_out4> */
4931
4932
4933   /* In SLP reduction chain we reduce vector results into one vector if
4934      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4935      the last stmt in the reduction chain, since we are looking for the loop
4936      exit phi node.  */
4937   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4938     {
4939       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
4940       /* Handle reduction patterns.  */
4941       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
4942         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
4943
4944       scalar_dest = gimple_assign_lhs (dest_stmt);
4945       group_size = 1;
4946     }
4947
4948   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4949      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4950      need to match SCALAR_RESULTS with corresponding statements.  The first
4951      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4952      the first vector stmt, etc.
4953      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4954   if (group_size > new_phis.length ())
4955     {
4956       ratio = group_size / new_phis.length ();
4957       gcc_assert (!(group_size % new_phis.length ()));
4958     }
4959   else
4960     ratio = 1;
4961
4962   for (k = 0; k < group_size; k++)
4963     {
4964       if (k % ratio == 0)
4965         {
4966           epilog_stmt = new_phis[k / ratio];
4967           reduction_phi = reduction_phis[k / ratio];
4968           if (double_reduc)
4969             inner_phi = inner_phis[k / ratio];
4970         }
4971
4972       if (slp_reduc)
4973         {
4974           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4975
4976           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4977           /* SLP statements can't participate in patterns.  */
4978           gcc_assert (!orig_stmt);
4979           scalar_dest = gimple_assign_lhs (current_stmt);
4980         }
4981
4982       phis.create (3);
4983       /* Find the loop-closed-use at the loop exit of the original scalar
4984          result.  (The reduction result is expected to have two immediate uses -
4985          one at the latch block, and one at the loop exit).  */
4986       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4987         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4988             && !is_gimple_debug (USE_STMT (use_p)))
4989           phis.safe_push (USE_STMT (use_p));
4990
4991       /* While we expect to have found an exit_phi because of loop-closed-ssa
4992          form we can end up without one if the scalar cycle is dead.  */
4993
4994       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4995         {
4996           if (outer_loop)
4997             {
4998               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4999               gphi *vect_phi;
5000
5001               /* FORNOW. Currently not supporting the case that an inner-loop
5002                  reduction is not used in the outer-loop (but only outside the
5003                  outer-loop), unless it is double reduction.  */
5004               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5005                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
5006                           || double_reduc);
5007
5008               if (double_reduc)
5009                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5010               else
5011                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
5012               if (!double_reduc
5013                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5014                       != vect_double_reduction_def)
5015                 continue;
5016
5017               /* Handle double reduction:
5018
5019                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
5020                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
5021                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
5022                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
5023
5024                  At that point the regular reduction (stmt2 and stmt3) is
5025                  already vectorized, as well as the exit phi node, stmt4.
5026                  Here we vectorize the phi node of double reduction, stmt1, and
5027                  update all relevant statements.  */
5028
5029               /* Go through all the uses of s2 to find double reduction phi
5030                  node, i.e., stmt1 above.  */
5031               orig_name = PHI_RESULT (exit_phi);
5032               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5033                 {
5034                   stmt_vec_info use_stmt_vinfo;
5035                   stmt_vec_info new_phi_vinfo;
5036                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5037                   basic_block bb = gimple_bb (use_stmt);
5038                   gimple *use;
5039
5040                   /* Check that USE_STMT is really double reduction phi
5041                      node.  */
5042                   if (gimple_code (use_stmt) != GIMPLE_PHI
5043                       || gimple_phi_num_args (use_stmt) != 2
5044                       || bb->loop_father != outer_loop)
5045                     continue;
5046                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5047                   if (!use_stmt_vinfo
5048                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5049                           != vect_double_reduction_def)
5050                     continue;
5051
5052                   /* Create vector phi node for double reduction:
5053                      vs1 = phi <vs0, vs2>
5054                      vs1 was created previously in this function by a call to
5055                        vect_get_vec_def_for_operand and is stored in
5056                        vec_initial_def;
5057                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5058                      vs0 is created here.  */
5059
5060                   /* Create vector phi node.  */
5061                   vect_phi = create_phi_node (vec_initial_def, bb);
5062                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
5063                                     loop_vec_info_for_loop (outer_loop));
5064                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5065
5066                   /* Create vs0 - initial def of the double reduction phi.  */
5067                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5068                                              loop_preheader_edge (outer_loop));
5069                   init_def = get_initial_def_for_reduction (stmt,
5070                                                           preheader_arg, NULL);
5071                   vect_phi_init = vect_init_vector (use_stmt, init_def,
5072                                                     vectype, NULL);
5073
5074                   /* Update phi node arguments with vs0 and vs2.  */
5075                   add_phi_arg (vect_phi, vect_phi_init,
5076                                loop_preheader_edge (outer_loop),
5077                                UNKNOWN_LOCATION);
5078                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5079                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5080                   if (dump_enabled_p ())
5081                     {
5082                       dump_printf_loc (MSG_NOTE, vect_location,
5083                                        "created double reduction phi node: ");
5084                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5085                       dump_printf (MSG_NOTE, "\n");
5086                     }
5087
5088                   vect_phi_res = PHI_RESULT (vect_phi);
5089
5090                   /* Replace the use, i.e., set the correct vs1 in the regular
5091                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5092                      loop is redundant.  */
5093                   use = reduction_phi;
5094                   for (j = 0; j < ncopies; j++)
5095                     {
5096                       edge pr_edge = loop_preheader_edge (loop);
5097                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5098                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5099                     }
5100                 }
5101             }
5102         }
5103
5104       phis.release ();
5105       if (nested_in_vect_loop)
5106         {
5107           if (double_reduc)
5108             loop = outer_loop;
5109           else
5110             continue;
5111         }
5112
5113       phis.create (3);
5114       /* Find the loop-closed-use at the loop exit of the original scalar
5115          result.  (The reduction result is expected to have two immediate uses,
5116          one at the latch block, and one at the loop exit).  For double
5117          reductions we are looking for exit phis of the outer loop.  */
5118       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5119         {
5120           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5121             {
5122               if (!is_gimple_debug (USE_STMT (use_p)))
5123                 phis.safe_push (USE_STMT (use_p));
5124             }
5125           else
5126             {
5127               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5128                 {
5129                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5130
5131                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5132                     {
5133                       if (!flow_bb_inside_loop_p (loop,
5134                                              gimple_bb (USE_STMT (phi_use_p)))
5135                           && !is_gimple_debug (USE_STMT (phi_use_p)))
5136                         phis.safe_push (USE_STMT (phi_use_p));
5137                     }
5138                 }
5139             }
5140         }
5141
5142       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5143         {
5144           /* Replace the uses:  */
5145           orig_name = PHI_RESULT (exit_phi);
5146           scalar_result = scalar_results[k];
5147           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5148             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5149               SET_USE (use_p, scalar_result);
5150         }
5151
5152       phis.release ();
5153     }
5154 }
5155
5156
5157 /* Function is_nonwrapping_integer_induction.
5158
5159    Check if STMT (which is part of loop LOOP) both increments and
5160    does not cause overflow.  */
5161
5162 static bool
5163 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5164 {
5165   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5166   tree base = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
5167   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5168   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5169   widest_int ni, max_loop_value, lhs_max;
5170   bool overflow = false;
5171
5172   /* Make sure the loop is integer based.  */
5173   if (TREE_CODE (base) != INTEGER_CST
5174       || TREE_CODE (step) != INTEGER_CST)
5175     return false;
5176
5177   /* Check that the induction increments.  */
5178   if (tree_int_cst_sgn (step) == -1)
5179     return false;
5180
5181   /* Check that the max size of the loop will not wrap.  */
5182
5183   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5184     return true;
5185
5186   if (! max_stmt_executions (loop, &ni))
5187     return false;
5188
5189   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5190                             &overflow);
5191   if (overflow)
5192     return false;
5193
5194   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5195                             TYPE_SIGN (lhs_type), &overflow);
5196   if (overflow)
5197     return false;
5198
5199   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5200           <= TYPE_PRECISION (lhs_type));
5201 }
5202
5203 /* Function vectorizable_reduction.
5204
5205    Check if STMT performs a reduction operation that can be vectorized.
5206    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5207    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5208    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5209
5210    This function also handles reduction idioms (patterns) that have been
5211    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5212    of this form:
5213      X = pattern_expr (arg0, arg1, ..., X)
5214    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5215    sequence that had been detected and replaced by the pattern-stmt (STMT).
5216
5217    This function also handles reduction of condition expressions, for example:
5218      for (int i = 0; i < N; i++)
5219        if (a[i] < value)
5220          last = a[i];
5221    This is handled by vectorising the loop and creating an additional vector
5222    containing the loop indexes for which "a[i] < value" was true.  In the
5223    function epilogue this is reduced to a single max value and then used to
5224    index into the vector of results.
5225
5226    In some cases of reduction patterns, the type of the reduction variable X is
5227    different than the type of the other arguments of STMT.
5228    In such cases, the vectype that is used when transforming STMT into a vector
5229    stmt is different than the vectype that is used to determine the
5230    vectorization factor, because it consists of a different number of elements
5231    than the actual number of elements that are being operated upon in parallel.
5232
5233    For example, consider an accumulation of shorts into an int accumulator.
5234    On some targets it's possible to vectorize this pattern operating on 8
5235    shorts at a time (hence, the vectype for purposes of determining the
5236    vectorization factor should be V8HI); on the other hand, the vectype that
5237    is used to create the vector form is actually V4SI (the type of the result).
5238
5239    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5240    indicates what is the actual level of parallelism (V8HI in the example), so
5241    that the right vectorization factor would be derived.  This vectype
5242    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5243    be used to create the vectorized stmt.  The right vectype for the vectorized
5244    stmt is obtained from the type of the result X:
5245         get_vectype_for_scalar_type (TREE_TYPE (X))
5246
5247    This means that, contrary to "regular" reductions (or "regular" stmts in
5248    general), the following equation:
5249       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5250    does *NOT* necessarily hold for reduction patterns.  */
5251
5252 bool
5253 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5254                         gimple **vec_stmt, slp_tree slp_node)
5255 {
5256   tree vec_dest;
5257   tree scalar_dest;
5258   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5259   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5260   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5261   tree vectype_in = NULL_TREE;
5262   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5263   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5264   enum tree_code code, orig_code, epilog_reduc_code;
5265   machine_mode vec_mode;
5266   int op_type;
5267   optab optab, reduc_optab;
5268   tree new_temp = NULL_TREE;
5269   gimple *def_stmt;
5270   enum vect_def_type dt;
5271   gphi *new_phi = NULL;
5272   tree scalar_type;
5273   bool is_simple_use;
5274   gimple *orig_stmt;
5275   stmt_vec_info orig_stmt_info;
5276   tree expr = NULL_TREE;
5277   int i;
5278   int ncopies;
5279   int epilog_copies;
5280   stmt_vec_info prev_stmt_info, prev_phi_info;
5281   bool single_defuse_cycle = false;
5282   tree reduc_def = NULL_TREE;
5283   gimple *new_stmt = NULL;
5284   int j;
5285   tree ops[3];
5286   bool nested_cycle = false, found_nested_cycle_def = false;
5287   gimple *reduc_def_stmt = NULL;
5288   bool double_reduc = false, dummy;
5289   basic_block def_bb;
5290   struct loop * def_stmt_loop, *outer_loop = NULL;
5291   tree def_arg;
5292   gimple *def_arg_stmt;
5293   auto_vec<tree> vec_oprnds0;
5294   auto_vec<tree> vec_oprnds1;
5295   auto_vec<tree> vect_defs;
5296   auto_vec<gimple *> phis;
5297   int vec_num;
5298   tree def0, def1, tem, op0, op1 = NULL_TREE;
5299   bool first_p = true;
5300   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5301   bool cond_expr_is_nonwrapping_integer_induction = false;
5302
5303   /* In case of reduction chain we switch to the first stmt in the chain, but
5304      we don't update STMT_INFO, since only the last stmt is marked as reduction
5305      and has reduction properties.  */
5306   if (GROUP_FIRST_ELEMENT (stmt_info)
5307       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5308     {
5309       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5310       first_p = false;
5311     }
5312
5313   if (nested_in_vect_loop_p (loop, stmt))
5314     {
5315       outer_loop = loop;
5316       loop = loop->inner;
5317       nested_cycle = true;
5318     }
5319
5320   /* 1. Is vectorizable reduction?  */
5321   /* Not supportable if the reduction variable is used in the loop, unless
5322      it's a reduction chain.  */
5323   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5324       && !GROUP_FIRST_ELEMENT (stmt_info))
5325     return false;
5326
5327   /* Reductions that are not used even in an enclosing outer-loop,
5328      are expected to be "live" (used out of the loop).  */
5329   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5330       && !STMT_VINFO_LIVE_P (stmt_info))
5331     return false;
5332
5333   /* Make sure it was already recognized as a reduction computation.  */
5334   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5335       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5336     return false;
5337
5338   /* 2. Has this been recognized as a reduction pattern?
5339
5340      Check if STMT represents a pattern that has been recognized
5341      in earlier analysis stages.  For stmts that represent a pattern,
5342      the STMT_VINFO_RELATED_STMT field records the last stmt in
5343      the original sequence that constitutes the pattern.  */
5344
5345   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5346   if (orig_stmt)
5347     {
5348       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5349       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5350       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5351     }
5352
5353   /* 3. Check the operands of the operation.  The first operands are defined
5354         inside the loop body. The last operand is the reduction variable,
5355         which is defined by the loop-header-phi.  */
5356
5357   gcc_assert (is_gimple_assign (stmt));
5358
5359   /* Flatten RHS.  */
5360   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5361     {
5362     case GIMPLE_SINGLE_RHS:
5363       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5364       if (op_type == ternary_op)
5365         {
5366           tree rhs = gimple_assign_rhs1 (stmt);
5367           ops[0] = TREE_OPERAND (rhs, 0);
5368           ops[1] = TREE_OPERAND (rhs, 1);
5369           ops[2] = TREE_OPERAND (rhs, 2);
5370           code = TREE_CODE (rhs);
5371         }
5372       else
5373         return false;
5374       break;
5375
5376     case GIMPLE_BINARY_RHS:
5377       code = gimple_assign_rhs_code (stmt);
5378       op_type = TREE_CODE_LENGTH (code);
5379       gcc_assert (op_type == binary_op);
5380       ops[0] = gimple_assign_rhs1 (stmt);
5381       ops[1] = gimple_assign_rhs2 (stmt);
5382       break;
5383
5384     case GIMPLE_TERNARY_RHS:
5385       code = gimple_assign_rhs_code (stmt);
5386       op_type = TREE_CODE_LENGTH (code);
5387       gcc_assert (op_type == ternary_op);
5388       ops[0] = gimple_assign_rhs1 (stmt);
5389       ops[1] = gimple_assign_rhs2 (stmt);
5390       ops[2] = gimple_assign_rhs3 (stmt);
5391       break;
5392
5393     case GIMPLE_UNARY_RHS:
5394       return false;
5395
5396     default:
5397       gcc_unreachable ();
5398     }
5399   /* The default is that the reduction variable is the last in statement.  */
5400   int reduc_index = op_type - 1;
5401
5402   if (code == COND_EXPR && slp_node)
5403     return false;
5404
5405   scalar_dest = gimple_assign_lhs (stmt);
5406   scalar_type = TREE_TYPE (scalar_dest);
5407   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5408       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5409     return false;
5410
5411   /* Do not try to vectorize bit-precision reductions.  */
5412   if ((TYPE_PRECISION (scalar_type)
5413        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5414     return false;
5415
5416   /* All uses but the last are expected to be defined in the loop.
5417      The last use is the reduction variable.  In case of nested cycle this
5418      assumption is not true: we use reduc_index to record the index of the
5419      reduction variable.  */
5420   for (i = 0; i < op_type - 1; i++)
5421     {
5422       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5423       if (i == 0 && code == COND_EXPR)
5424         continue;
5425
5426       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5427                                           &def_stmt, &dt, &tem);
5428       if (!vectype_in)
5429         vectype_in = tem;
5430       gcc_assert (is_simple_use);
5431
5432       if (dt != vect_internal_def
5433           && dt != vect_external_def
5434           && dt != vect_constant_def
5435           && dt != vect_induction_def
5436           && !(dt == vect_nested_cycle && nested_cycle))
5437         return false;
5438
5439       if (dt == vect_nested_cycle)
5440         {
5441           found_nested_cycle_def = true;
5442           reduc_def_stmt = def_stmt;
5443           reduc_index = i;
5444         }
5445
5446       if (i == 1 && code == COND_EXPR && dt == vect_induction_def
5447           && is_nonwrapping_integer_induction (def_stmt, loop))
5448         {
5449           if (dump_enabled_p ())
5450             dump_printf_loc (MSG_NOTE, vect_location,
5451                              "condition expression based on integer "
5452                              "induction.\n");
5453           cond_expr_is_nonwrapping_integer_induction = true;
5454         }
5455     }
5456
5457   is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
5458   if (!vectype_in)
5459     vectype_in = tem;
5460   gcc_assert (is_simple_use);
5461   if (!found_nested_cycle_def)
5462     reduc_def_stmt = def_stmt;
5463
5464   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5465     return false;
5466
5467   if (!(dt == vect_reduction_def
5468         || dt == vect_nested_cycle
5469         || ((dt == vect_internal_def || dt == vect_external_def
5470              || dt == vect_constant_def || dt == vect_induction_def)
5471             && nested_cycle && found_nested_cycle_def)))
5472     {
5473       /* For pattern recognized stmts, orig_stmt might be a reduction,
5474          but some helper statements for the pattern might not, or
5475          might be COND_EXPRs with reduction uses in the condition.  */
5476       gcc_assert (orig_stmt);
5477       return false;
5478     }
5479
5480   gimple *tmp = vect_is_simple_reduction
5481                   (loop_vinfo, reduc_def_stmt,
5482                   !nested_cycle, &dummy, false,
5483                   &STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info));
5484
5485   if (cond_expr_is_nonwrapping_integer_induction
5486       && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5487     STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
5488
5489   if (orig_stmt)
5490     gcc_assert (tmp == orig_stmt
5491                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5492   else
5493     /* We changed STMT to be the first stmt in reduction chain, hence we
5494        check that in this case the first element in the chain is STMT.  */
5495     gcc_assert (stmt == tmp
5496                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5497
5498   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5499     return false;
5500
5501   if (slp_node || PURE_SLP_STMT (stmt_info))
5502     ncopies = 1;
5503   else
5504     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5505                / TYPE_VECTOR_SUBPARTS (vectype_in));
5506
5507   gcc_assert (ncopies >= 1);
5508
5509   vec_mode = TYPE_MODE (vectype_in);
5510
5511   if (code == COND_EXPR)
5512     {
5513       /* Only call during the analysis stage, otherwise we'll lose
5514          STMT_VINFO_TYPE.  */
5515       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5516                                                 ops[reduc_index], 0, NULL))
5517         {
5518           if (dump_enabled_p ())
5519             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520                              "unsupported condition in reduction\n");
5521           return false;
5522         }
5523     }
5524   else
5525     {
5526       /* 4. Supportable by target?  */
5527
5528       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5529           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5530         {
5531           /* Shifts and rotates are only supported by vectorizable_shifts,
5532              not vectorizable_reduction.  */
5533           if (dump_enabled_p ())
5534             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5535                              "unsupported shift or rotation.\n");
5536           return false;
5537         }
5538
5539       /* 4.1. check support for the operation in the loop  */
5540       optab = optab_for_tree_code (code, vectype_in, optab_default);
5541       if (!optab)
5542         {
5543           if (dump_enabled_p ())
5544             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5545                              "no optab.\n");
5546
5547           return false;
5548         }
5549
5550       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5551         {
5552           if (dump_enabled_p ())
5553             dump_printf (MSG_NOTE, "op not supported by target.\n");
5554
5555           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5556               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5557                   < vect_min_worthwhile_factor (code))
5558             return false;
5559
5560           if (dump_enabled_p ())
5561             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5562         }
5563
5564       /* Worthwhile without SIMD support?  */
5565       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5566           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5567              < vect_min_worthwhile_factor (code))
5568         {
5569           if (dump_enabled_p ())
5570             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5571                              "not worthwhile without SIMD support.\n");
5572
5573           return false;
5574         }
5575     }
5576
5577   /* 4.2. Check support for the epilog operation.
5578
5579           If STMT represents a reduction pattern, then the type of the
5580           reduction variable may be different than the type of the rest
5581           of the arguments.  For example, consider the case of accumulation
5582           of shorts into an int accumulator; The original code:
5583                         S1: int_a = (int) short_a;
5584           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5585
5586           was replaced with:
5587                         STMT: int_acc = widen_sum <short_a, int_acc>
5588
5589           This means that:
5590           1. The tree-code that is used to create the vector operation in the
5591              epilog code (that reduces the partial results) is not the
5592              tree-code of STMT, but is rather the tree-code of the original
5593              stmt from the pattern that STMT is replacing.  I.e, in the example
5594              above we want to use 'widen_sum' in the loop, but 'plus' in the
5595              epilog.
5596           2. The type (mode) we use to check available target support
5597              for the vector operation to be created in the *epilog*, is
5598              determined by the type of the reduction variable (in the example
5599              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5600              However the type (mode) we use to check available target support
5601              for the vector operation to be created *inside the loop*, is
5602              determined by the type of the other arguments to STMT (in the
5603              example we'd check this: optab_handler (widen_sum_optab,
5604              vect_short_mode)).
5605
5606           This is contrary to "regular" reductions, in which the types of all
5607           the arguments are the same as the type of the reduction variable.
5608           For "regular" reductions we can therefore use the same vector type
5609           (and also the same tree-code) when generating the epilog code and
5610           when generating the code inside the loop.  */
5611
5612   if (orig_stmt)
5613     {
5614       /* This is a reduction pattern: get the vectype from the type of the
5615          reduction variable, and get the tree-code from orig_stmt.  */
5616       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5617                   == TREE_CODE_REDUCTION);
5618       orig_code = gimple_assign_rhs_code (orig_stmt);
5619       gcc_assert (vectype_out);
5620       vec_mode = TYPE_MODE (vectype_out);
5621     }
5622   else
5623     {
5624       /* Regular reduction: use the same vectype and tree-code as used for
5625          the vector code inside the loop can be used for the epilog code. */
5626       orig_code = code;
5627
5628       /* For simple condition reductions, replace with the actual expression
5629          we want to base our reduction around.  */
5630       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5631           == INTEGER_INDUC_COND_REDUCTION)
5632         orig_code = MAX_EXPR;
5633     }
5634
5635   if (nested_cycle)
5636     {
5637       def_bb = gimple_bb (reduc_def_stmt);
5638       def_stmt_loop = def_bb->loop_father;
5639       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5640                                        loop_preheader_edge (def_stmt_loop));
5641       if (TREE_CODE (def_arg) == SSA_NAME
5642           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5643           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5644           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5645           && vinfo_for_stmt (def_arg_stmt)
5646           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5647               == vect_double_reduction_def)
5648         double_reduc = true;
5649     }
5650
5651   epilog_reduc_code = ERROR_MARK;
5652
5653   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
5654       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5655                 == INTEGER_INDUC_COND_REDUCTION)
5656     {
5657       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5658         {
5659           reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5660                                          optab_default);
5661           if (!reduc_optab)
5662             {
5663               if (dump_enabled_p ())
5664                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5665                                  "no optab for reduction.\n");
5666
5667               epilog_reduc_code = ERROR_MARK;
5668             }
5669           else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5670             {
5671               optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5672               if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5673                 {
5674                   if (dump_enabled_p ())
5675                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5676                                      "reduc op not supported by target.\n");
5677
5678                   epilog_reduc_code = ERROR_MARK;
5679                 }
5680             }
5681
5682           /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5683              generated in the epilog using multiple expressions.  This does not
5684              work for condition reductions.  */
5685           if (epilog_reduc_code == ERROR_MARK
5686               && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5687                         == INTEGER_INDUC_COND_REDUCTION)
5688             {
5689               if (dump_enabled_p ())
5690                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5691                                  "no reduc code for scalar code.\n");
5692               return false;
5693             }
5694         }
5695       else
5696         {
5697           if (!nested_cycle || double_reduc)
5698             {
5699               if (dump_enabled_p ())
5700                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5701                                  "no reduc code for scalar code.\n");
5702
5703               return false;
5704             }
5705         }
5706     }
5707   else
5708     {
5709       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
5710       cr_index_scalar_type = make_unsigned_type (scalar_precision);
5711       cr_index_vector_type = build_vector_type
5712         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
5713
5714       epilog_reduc_code = REDUC_MAX_EXPR;
5715       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
5716                                    optab_default);
5717       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
5718           == CODE_FOR_nothing)
5719         {
5720           if (dump_enabled_p ())
5721             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5722                              "reduc max op not supported by target.\n");
5723           return false;
5724         }
5725     }
5726
5727   if ((double_reduc
5728        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
5729        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5730                 == INTEGER_INDUC_COND_REDUCTION)
5731       && ncopies > 1)
5732     {
5733       if (dump_enabled_p ())
5734         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5735                          "multiple types in double reduction or condition "
5736                          "reduction.\n");
5737       return false;
5738     }
5739
5740   /* In case of widenning multiplication by a constant, we update the type
5741      of the constant to be the type of the other operand.  We check that the
5742      constant fits the type in the pattern recognition pass.  */
5743   if (code == DOT_PROD_EXPR
5744       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5745     {
5746       if (TREE_CODE (ops[0]) == INTEGER_CST)
5747         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5748       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5749         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5750       else
5751         {
5752           if (dump_enabled_p ())
5753             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5754                              "invalid types in dot-prod\n");
5755
5756           return false;
5757         }
5758     }
5759
5760   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5761     {
5762       widest_int ni;
5763
5764       if (! max_loop_iterations (loop, &ni))
5765         {
5766           if (dump_enabled_p ())
5767             dump_printf_loc (MSG_NOTE, vect_location,
5768                              "loop count not known, cannot create cond "
5769                              "reduction.\n");
5770           return false;
5771         }
5772       /* Convert backedges to iterations.  */
5773       ni += 1;
5774
5775       /* The additional index will be the same type as the condition.  Check
5776          that the loop can fit into this less one (because we'll use up the
5777          zero slot for when there are no matches).  */
5778       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
5779       if (wi::geu_p (ni, wi::to_widest (max_index)))
5780         {
5781           if (dump_enabled_p ())
5782             dump_printf_loc (MSG_NOTE, vect_location,
5783                              "loop size is greater than data size.\n");
5784           return false;
5785         }
5786     }
5787
5788   if (!vec_stmt) /* transformation not required.  */
5789     {
5790       if (first_p
5791           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5792                                          reduc_index))
5793         return false;
5794       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5795       return true;
5796     }
5797
5798   /** Transform.  **/
5799
5800   if (dump_enabled_p ())
5801     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5802
5803   /* FORNOW: Multiple types are not supported for condition.  */
5804   if (code == COND_EXPR)
5805     gcc_assert (ncopies == 1);
5806
5807   /* Create the destination vector  */
5808   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5809
5810   /* In case the vectorization factor (VF) is bigger than the number
5811      of elements that we can fit in a vectype (nunits), we have to generate
5812      more than one vector stmt - i.e - we need to "unroll" the
5813      vector stmt by a factor VF/nunits.  For more details see documentation
5814      in vectorizable_operation.  */
5815
5816   /* If the reduction is used in an outer loop we need to generate
5817      VF intermediate results, like so (e.g. for ncopies=2):
5818         r0 = phi (init, r0)
5819         r1 = phi (init, r1)
5820         r0 = x0 + r0;
5821         r1 = x1 + r1;
5822     (i.e. we generate VF results in 2 registers).
5823     In this case we have a separate def-use cycle for each copy, and therefore
5824     for each copy we get the vector def for the reduction variable from the
5825     respective phi node created for this copy.
5826
5827     Otherwise (the reduction is unused in the loop nest), we can combine
5828     together intermediate results, like so (e.g. for ncopies=2):
5829         r = phi (init, r)
5830         r = x0 + r;
5831         r = x1 + r;
5832    (i.e. we generate VF/2 results in a single register).
5833    In this case for each copy we get the vector def for the reduction variable
5834    from the vectorized reduction operation generated in the previous iteration.
5835   */
5836
5837   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5838     {
5839       single_defuse_cycle = true;
5840       epilog_copies = 1;
5841     }
5842   else
5843     epilog_copies = ncopies;
5844
5845   prev_stmt_info = NULL;
5846   prev_phi_info = NULL;
5847   if (slp_node)
5848     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5849   else
5850     {
5851       vec_num = 1;
5852       vec_oprnds0.create (1);
5853       if (op_type == ternary_op)
5854         vec_oprnds1.create (1);
5855     }
5856
5857   phis.create (vec_num);
5858   vect_defs.create (vec_num);
5859   if (!slp_node)
5860     vect_defs.quick_push (NULL_TREE);
5861
5862   for (j = 0; j < ncopies; j++)
5863     {
5864       if (j == 0 || !single_defuse_cycle)
5865         {
5866           for (i = 0; i < vec_num; i++)
5867             {
5868               /* Create the reduction-phi that defines the reduction
5869                  operand.  */
5870               new_phi = create_phi_node (vec_dest, loop->header);
5871               set_vinfo_for_stmt (new_phi,
5872                                   new_stmt_vec_info (new_phi, loop_vinfo));
5873                if (j == 0 || slp_node)
5874                  phis.quick_push (new_phi);
5875             }
5876         }
5877
5878       if (code == COND_EXPR)
5879         {
5880           gcc_assert (!slp_node);
5881           vectorizable_condition (stmt, gsi, vec_stmt,
5882                                   PHI_RESULT (phis[0]),
5883                                   reduc_index, NULL);
5884           /* Multiple types are not supported for condition.  */
5885           break;
5886         }
5887
5888       /* Handle uses.  */
5889       if (j == 0)
5890         {
5891           op0 = ops[!reduc_index];
5892           if (op_type == ternary_op)
5893             {
5894               if (reduc_index == 0)
5895                 op1 = ops[2];
5896               else
5897                 op1 = ops[1];
5898             }
5899
5900           if (slp_node)
5901             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5902                                slp_node, -1);
5903           else
5904             {
5905               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5906                                                             stmt);
5907               vec_oprnds0.quick_push (loop_vec_def0);
5908               if (op_type == ternary_op)
5909                {
5910                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
5911                  vec_oprnds1.quick_push (loop_vec_def1);
5912                }
5913             }
5914         }
5915       else
5916         {
5917           if (!slp_node)
5918             {
5919               enum vect_def_type dt;
5920               gimple *dummy_stmt;
5921
5922               vect_is_simple_use (ops[!reduc_index], loop_vinfo,
5923                                   &dummy_stmt, &dt);
5924               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5925                                                               loop_vec_def0);
5926               vec_oprnds0[0] = loop_vec_def0;
5927               if (op_type == ternary_op)
5928                 {
5929                   vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
5930                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5931                                                                 loop_vec_def1);
5932                   vec_oprnds1[0] = loop_vec_def1;
5933                 }
5934             }
5935
5936           if (single_defuse_cycle)
5937             reduc_def = gimple_assign_lhs (new_stmt);
5938
5939           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5940         }
5941
5942       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5943         {
5944           if (slp_node)
5945             reduc_def = PHI_RESULT (phis[i]);
5946           else
5947             {
5948               if (!single_defuse_cycle || j == 0)
5949                 reduc_def = PHI_RESULT (new_phi);
5950             }
5951
5952           def1 = ((op_type == ternary_op)
5953                   ? vec_oprnds1[i] : NULL);
5954           if (op_type == binary_op)
5955             {
5956               if (reduc_index == 0)
5957                 expr = build2 (code, vectype_out, reduc_def, def0);
5958               else
5959                 expr = build2 (code, vectype_out, def0, reduc_def);
5960             }
5961           else
5962             {
5963               if (reduc_index == 0)
5964                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5965               else
5966                 {
5967                   if (reduc_index == 1)
5968                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5969                   else
5970                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5971                 }
5972             }
5973
5974           new_stmt = gimple_build_assign (vec_dest, expr);
5975           new_temp = make_ssa_name (vec_dest, new_stmt);
5976           gimple_assign_set_lhs (new_stmt, new_temp);
5977           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5978
5979           if (slp_node)
5980             {
5981               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5982               vect_defs.quick_push (new_temp);
5983             }
5984           else
5985             vect_defs[0] = new_temp;
5986         }
5987
5988       if (slp_node)
5989         continue;
5990
5991       if (j == 0)
5992         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5993       else
5994         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5995
5996       prev_stmt_info = vinfo_for_stmt (new_stmt);
5997       prev_phi_info = vinfo_for_stmt (new_phi);
5998     }
5999
6000   tree indx_before_incr, indx_after_incr, cond_name = NULL;
6001
6002   /* Finalize the reduction-phi (set its arguments) and create the
6003      epilog reduction code.  */
6004   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6005     {
6006       new_temp = gimple_assign_lhs (*vec_stmt);
6007       vect_defs[0] = new_temp;
6008
6009       /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
6010          which is updated with the current index of the loop for every match of
6011          the original loop's cond_expr (VEC_STMT).  This results in a vector
6012          containing the last time the condition passed for that vector lane.
6013          The first match will be a 1 to allow 0 to be used for non-matching
6014          indexes.  If there are no matches at all then the vector will be all
6015          zeroes.  */
6016       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6017         {
6018           int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6019           int k;
6020
6021           gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6022
6023           /* First we create a simple vector induction variable which starts
6024              with the values {1,2,3,...} (SERIES_VECT) and increments by the
6025              vector size (STEP).  */
6026
6027           /* Create a {1,2,3,...} vector.  */
6028           tree *vtemp = XALLOCAVEC (tree, nunits_out);
6029           for (k = 0; k < nunits_out; ++k)
6030             vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6031           tree series_vect = build_vector (cr_index_vector_type, vtemp);
6032
6033           /* Create a vector of the step value.  */
6034           tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6035           tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6036
6037           /* Create an induction variable.  */
6038           gimple_stmt_iterator incr_gsi;
6039           bool insert_after;
6040           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6041           create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6042                      insert_after, &indx_before_incr, &indx_after_incr);
6043
6044           /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6045              filled with zeros (VEC_ZERO).  */
6046
6047           /* Create a vector of 0s.  */
6048           tree zero = build_zero_cst (cr_index_scalar_type);
6049           tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6050
6051           /* Create a vector phi node.  */
6052           tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6053           new_phi = create_phi_node (new_phi_tree, loop->header);
6054           set_vinfo_for_stmt (new_phi,
6055                               new_stmt_vec_info (new_phi, loop_vinfo));
6056           add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6057                        UNKNOWN_LOCATION);
6058
6059           /* Now take the condition from the loops original cond_expr
6060              (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6061              every match uses values from the induction variable
6062              (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6063              (NEW_PHI_TREE).
6064              Finally, we update the phi (NEW_PHI_TREE) to take the value of
6065              the new cond_expr (INDEX_COND_EXPR).  */
6066
6067           /* Turn the condition from vec_stmt into an ssa name.  */
6068           gimple_stmt_iterator vec_stmt_gsi = gsi_for_stmt (*vec_stmt);
6069           tree ccompare = gimple_assign_rhs1 (*vec_stmt);
6070           tree ccompare_name = make_ssa_name (TREE_TYPE (ccompare));
6071           gimple *ccompare_stmt = gimple_build_assign (ccompare_name,
6072                                                        ccompare);
6073           gsi_insert_before (&vec_stmt_gsi, ccompare_stmt, GSI_SAME_STMT);
6074           gimple_assign_set_rhs1 (*vec_stmt, ccompare_name);
6075           update_stmt (*vec_stmt);
6076
6077           /* Create a conditional, where the condition is taken from vec_stmt
6078              (CCOMPARE_NAME), then is the induction index (INDEX_BEFORE_INCR)
6079              and else is the phi (NEW_PHI_TREE).  */
6080           tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6081                                          ccompare_name, indx_before_incr,
6082                                          new_phi_tree);
6083           cond_name = make_ssa_name (cr_index_vector_type);
6084           gimple *index_condition = gimple_build_assign (cond_name,
6085                                                          index_cond_expr);
6086           gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6087           stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6088                                                             loop_vinfo);
6089           STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6090           set_vinfo_for_stmt (index_condition, index_vec_info);
6091
6092           /* Update the phi with the vec cond.  */
6093           add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6094                        UNKNOWN_LOCATION);
6095         }
6096     }
6097
6098   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6099                                     epilog_reduc_code, phis, reduc_index,
6100                                     double_reduc, slp_node, cond_name);
6101
6102   return true;
6103 }
6104
6105 /* Function vect_min_worthwhile_factor.
6106
6107    For a loop where we could vectorize the operation indicated by CODE,
6108    return the minimum vectorization factor that makes it worthwhile
6109    to use generic vectors.  */
6110 int
6111 vect_min_worthwhile_factor (enum tree_code code)
6112 {
6113   switch (code)
6114     {
6115     case PLUS_EXPR:
6116     case MINUS_EXPR:
6117     case NEGATE_EXPR:
6118       return 4;
6119
6120     case BIT_AND_EXPR:
6121     case BIT_IOR_EXPR:
6122     case BIT_XOR_EXPR:
6123     case BIT_NOT_EXPR:
6124       return 2;
6125
6126     default:
6127       return INT_MAX;
6128     }
6129 }
6130
6131
6132 /* Function vectorizable_induction
6133
6134    Check if PHI performs an induction computation that can be vectorized.
6135    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6136    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6137    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6138
6139 bool
6140 vectorizable_induction (gimple *phi,
6141                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6142                         gimple **vec_stmt)
6143 {
6144   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6145   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6146   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6147   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6148   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6149   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6150   tree vec_def;
6151
6152   gcc_assert (ncopies >= 1);
6153   /* FORNOW. These restrictions should be relaxed.  */
6154   if (nested_in_vect_loop_p (loop, phi))
6155     {
6156       imm_use_iterator imm_iter;
6157       use_operand_p use_p;
6158       gimple *exit_phi;
6159       edge latch_e;
6160       tree loop_arg;
6161
6162       if (ncopies > 1)
6163         {
6164           if (dump_enabled_p ())
6165             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6166                              "multiple types in nested loop.\n");
6167           return false;
6168         }
6169
6170       exit_phi = NULL;
6171       latch_e = loop_latch_edge (loop->inner);
6172       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6173       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6174         {
6175           gimple *use_stmt = USE_STMT (use_p);
6176           if (is_gimple_debug (use_stmt))
6177             continue;
6178
6179           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6180             {
6181               exit_phi = use_stmt;
6182               break;
6183             }
6184         }
6185       if (exit_phi)
6186         {
6187           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6188           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6189                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6190             {
6191               if (dump_enabled_p ())
6192                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6193                                  "inner-loop induction only used outside "
6194                                  "of the outer vectorized loop.\n");
6195               return false;
6196             }
6197         }
6198     }
6199
6200   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6201     return false;
6202
6203   /* FORNOW: SLP not supported.  */
6204   if (STMT_SLP_TYPE (stmt_info))
6205     return false;
6206
6207   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
6208
6209   if (gimple_code (phi) != GIMPLE_PHI)
6210     return false;
6211
6212   if (!vec_stmt) /* transformation not required.  */
6213     {
6214       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6215       if (dump_enabled_p ())
6216         dump_printf_loc (MSG_NOTE, vect_location,
6217                          "=== vectorizable_induction ===\n");
6218       vect_model_induction_cost (stmt_info, ncopies);
6219       return true;
6220     }
6221
6222   /** Transform.  **/
6223
6224   if (dump_enabled_p ())
6225     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6226
6227   vec_def = get_initial_def_for_induction (phi);
6228   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
6229   return true;
6230 }
6231
6232 /* Function vectorizable_live_operation.
6233
6234    STMT computes a value that is used outside the loop.  Check if
6235    it can be supported.  */
6236
6237 bool
6238 vectorizable_live_operation (gimple *stmt,
6239                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6240                              gimple **vec_stmt)
6241 {
6242   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6243   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6244   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6245   tree op;
6246   gimple *def_stmt;
6247   ssa_op_iter iter;
6248
6249   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6250
6251   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6252     return false;
6253
6254   if (!is_gimple_assign (stmt))
6255     {
6256       if (gimple_call_internal_p (stmt)
6257           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
6258           && gimple_call_lhs (stmt)
6259           && loop->simduid
6260           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
6261           && loop->simduid
6262              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
6263         {
6264           edge e = single_exit (loop);
6265           basic_block merge_bb = e->dest;
6266           imm_use_iterator imm_iter;
6267           use_operand_p use_p;
6268           tree lhs = gimple_call_lhs (stmt);
6269
6270           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
6271             {
6272               gimple *use_stmt = USE_STMT (use_p);
6273               if (gimple_code (use_stmt) == GIMPLE_PHI
6274                   && gimple_bb (use_stmt) == merge_bb)
6275                 {
6276                   if (vec_stmt)
6277                     {
6278                       tree vfm1
6279                         = build_int_cst (unsigned_type_node,
6280                                          loop_vinfo->vectorization_factor - 1);
6281                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
6282                     }
6283                   return true;
6284                 }
6285             }
6286         }
6287
6288       return false;
6289     }
6290
6291   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6292     return false;
6293
6294   /* FORNOW. CHECKME. */
6295   if (nested_in_vect_loop_p (loop, stmt))
6296     return false;
6297
6298   /* FORNOW: support only if all uses are invariant.  This means
6299      that the scalar operations can remain in place, unvectorized.
6300      The original last scalar value that they compute will be used.  */
6301   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
6302     {
6303       enum vect_def_type dt = vect_uninitialized_def;
6304
6305       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
6306         {
6307           if (dump_enabled_p ())
6308             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6309                              "use not simple.\n");
6310           return false;
6311         }
6312
6313       if (dt != vect_external_def && dt != vect_constant_def)
6314         return false;
6315     }
6316
6317   /* No transformation is required for the cases we currently support.  */
6318   return true;
6319 }
6320
6321 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
6322
6323 static void
6324 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6325 {
6326   ssa_op_iter op_iter;
6327   imm_use_iterator imm_iter;
6328   def_operand_p def_p;
6329   gimple *ustmt;
6330
6331   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6332     {
6333       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6334         {
6335           basic_block bb;
6336
6337           if (!is_gimple_debug (ustmt))
6338             continue;
6339
6340           bb = gimple_bb (ustmt);
6341
6342           if (!flow_bb_inside_loop_p (loop, bb))
6343             {
6344               if (gimple_debug_bind_p (ustmt))
6345                 {
6346                   if (dump_enabled_p ())
6347                     dump_printf_loc (MSG_NOTE, vect_location,
6348                                      "killing debug use\n");
6349
6350                   gimple_debug_bind_reset_value (ustmt);
6351                   update_stmt (ustmt);
6352                 }
6353               else
6354                 gcc_unreachable ();
6355             }
6356         }
6357     }
6358 }
6359
6360
6361 /* This function builds ni_name = number of iterations.  Statements
6362    are emitted on the loop preheader edge.  */
6363
6364 static tree
6365 vect_build_loop_niters (loop_vec_info loop_vinfo)
6366 {
6367   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6368   if (TREE_CODE (ni) == INTEGER_CST)
6369     return ni;
6370   else
6371     {
6372       tree ni_name, var;
6373       gimple_seq stmts = NULL;
6374       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6375
6376       var = create_tmp_var (TREE_TYPE (ni), "niters");
6377       ni_name = force_gimple_operand (ni, &stmts, false, var);
6378       if (stmts)
6379         gsi_insert_seq_on_edge_immediate (pe, stmts);
6380
6381       return ni_name;
6382     }
6383 }
6384
6385
6386 /* This function generates the following statements:
6387
6388    ni_name = number of iterations loop executes
6389    ratio = ni_name / vf
6390    ratio_mult_vf_name = ratio * vf
6391
6392    and places them on the loop preheader edge.  */
6393
6394 static void
6395 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6396                                  tree ni_name,
6397                                  tree *ratio_mult_vf_name_ptr,
6398                                  tree *ratio_name_ptr)
6399 {
6400   tree ni_minus_gap_name;
6401   tree var;
6402   tree ratio_name;
6403   tree ratio_mult_vf_name;
6404   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6405   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6406   tree log_vf;
6407
6408   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
6409
6410   /* If epilogue loop is required because of data accesses with gaps, we
6411      subtract one iteration from the total number of iterations here for
6412      correct calculation of RATIO.  */
6413   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6414     {
6415       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6416                                        ni_name,
6417                                        build_one_cst (TREE_TYPE (ni_name)));
6418       if (!is_gimple_val (ni_minus_gap_name))
6419         {
6420           var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
6421           gimple *stmts = NULL;
6422           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
6423                                                     true, var);
6424           gsi_insert_seq_on_edge_immediate (pe, stmts);
6425         }
6426     }
6427   else
6428     ni_minus_gap_name = ni_name;
6429
6430   /* Create: ratio = ni >> log2(vf) */
6431   /* ???  As we have ni == number of latch executions + 1, ni could
6432      have overflown to zero.  So avoid computing ratio based on ni
6433      but compute it using the fact that we know ratio will be at least
6434      one, thus via (ni - vf) >> log2(vf) + 1.  */
6435   ratio_name
6436     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
6437                    fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
6438                                 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6439                                              ni_minus_gap_name,
6440                                              build_int_cst
6441                                                (TREE_TYPE (ni_name), vf)),
6442                                 log_vf),
6443                    build_int_cst (TREE_TYPE (ni_name), 1));
6444   if (!is_gimple_val (ratio_name))
6445     {
6446       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
6447       gimple *stmts = NULL;
6448       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
6449       gsi_insert_seq_on_edge_immediate (pe, stmts);
6450     }
6451   *ratio_name_ptr = ratio_name;
6452
6453   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
6454
6455   if (ratio_mult_vf_name_ptr)
6456     {
6457       ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6458                                         ratio_name, log_vf);
6459       if (!is_gimple_val (ratio_mult_vf_name))
6460         {
6461           var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
6462           gimple *stmts = NULL;
6463           ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
6464                                                      true, var);
6465           gsi_insert_seq_on_edge_immediate (pe, stmts);
6466         }
6467       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6468     }
6469
6470   return;
6471 }
6472
6473
6474 /* Function vect_transform_loop.
6475
6476    The analysis phase has determined that the loop is vectorizable.
6477    Vectorize the loop - created vectorized stmts to replace the scalar
6478    stmts in the loop, and update the loop exit condition.  */
6479
6480 void
6481 vect_transform_loop (loop_vec_info loop_vinfo)
6482 {
6483   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6484   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6485   int nbbs = loop->num_nodes;
6486   int i;
6487   tree ratio = NULL;
6488   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6489   bool grouped_store;
6490   bool slp_scheduled = false;
6491   gimple *stmt, *pattern_stmt;
6492   gimple_seq pattern_def_seq = NULL;
6493   gimple_stmt_iterator pattern_def_si = gsi_none ();
6494   bool transform_pattern_stmt = false;
6495   bool check_profitability = false;
6496   int th;
6497   /* Record number of iterations before we started tampering with the profile. */
6498   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
6499
6500   if (dump_enabled_p ())
6501     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6502
6503   /* If profile is inprecise, we have chance to fix it up.  */
6504   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6505     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
6506
6507   /* Use the more conservative vectorization threshold.  If the number
6508      of iterations is constant assume the cost check has been performed
6509      by our caller.  If the threshold makes all loops profitable that
6510      run at least the vectorization factor number of times checking
6511      is pointless, too.  */
6512   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6513   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6514       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6515     {
6516       if (dump_enabled_p ())
6517         dump_printf_loc (MSG_NOTE, vect_location,
6518                          "Profitability threshold is %d loop iterations.\n",
6519                          th);
6520       check_profitability = true;
6521     }
6522
6523   /* Version the loop first, if required, so the profitability check
6524      comes first.  */
6525
6526   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
6527       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
6528     {
6529       vect_loop_versioning (loop_vinfo, th, check_profitability);
6530       check_profitability = false;
6531     }
6532
6533   tree ni_name = vect_build_loop_niters (loop_vinfo);
6534   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
6535
6536   /* Peel the loop if there are data refs with unknown alignment.
6537      Only one data ref with unknown store is allowed.  */
6538
6539   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
6540     {
6541       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
6542                                      th, check_profitability);
6543       check_profitability = false;
6544       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
6545          be re-computed.  */
6546       ni_name = NULL_TREE;
6547     }
6548
6549   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6550      compile time constant), or it is a constant that doesn't divide by the
6551      vectorization factor, then an epilog loop needs to be created.
6552      We therefore duplicate the loop: the original loop will be vectorized,
6553      and will compute the first (n/VF) iterations.  The second copy of the loop
6554      will remain scalar and will compute the remaining (n%VF) iterations.
6555      (VF is the vectorization factor).  */
6556
6557   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6558       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6559     {
6560       tree ratio_mult_vf;
6561       if (!ni_name)
6562         ni_name = vect_build_loop_niters (loop_vinfo);
6563       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6564                                        &ratio);
6565       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6566                                       th, check_profitability);
6567     }
6568   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6569     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6570                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6571   else
6572     {
6573       if (!ni_name)
6574         ni_name = vect_build_loop_niters (loop_vinfo);
6575       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6576     }
6577
6578   /* 1) Make sure the loop header has exactly two entries
6579      2) Make sure we have a preheader basic block.  */
6580
6581   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6582
6583   split_edge (loop_preheader_edge (loop));
6584
6585   /* FORNOW: the vectorizer supports only loops which body consist
6586      of one basic block (header + empty latch). When the vectorizer will
6587      support more involved loop forms, the order by which the BBs are
6588      traversed need to be reconsidered.  */
6589
6590   for (i = 0; i < nbbs; i++)
6591     {
6592       basic_block bb = bbs[i];
6593       stmt_vec_info stmt_info;
6594
6595       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6596            gsi_next (&si))
6597         {
6598           gphi *phi = si.phi ();
6599           if (dump_enabled_p ())
6600             {
6601               dump_printf_loc (MSG_NOTE, vect_location,
6602                                "------>vectorizing phi: ");
6603               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6604               dump_printf (MSG_NOTE, "\n");
6605             }
6606           stmt_info = vinfo_for_stmt (phi);
6607           if (!stmt_info)
6608             continue;
6609
6610           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6611             vect_loop_kill_debug_uses (loop, phi);
6612
6613           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6614               && !STMT_VINFO_LIVE_P (stmt_info))
6615             continue;
6616
6617           if (STMT_VINFO_VECTYPE (stmt_info)
6618               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6619                   != (unsigned HOST_WIDE_INT) vectorization_factor)
6620               && dump_enabled_p ())
6621             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6622
6623           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6624             {
6625               if (dump_enabled_p ())
6626                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6627               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6628             }
6629         }
6630
6631       pattern_stmt = NULL;
6632       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6633            !gsi_end_p (si) || transform_pattern_stmt;)
6634         {
6635           bool is_store;
6636
6637           if (transform_pattern_stmt)
6638             stmt = pattern_stmt;
6639           else
6640             {
6641               stmt = gsi_stmt (si);
6642               /* During vectorization remove existing clobber stmts.  */
6643               if (gimple_clobber_p (stmt))
6644                 {
6645                   unlink_stmt_vdef (stmt);
6646                   gsi_remove (&si, true);
6647                   release_defs (stmt);
6648                   continue;
6649                 }
6650             }
6651
6652           if (dump_enabled_p ())
6653             {
6654               dump_printf_loc (MSG_NOTE, vect_location,
6655                                "------>vectorizing statement: ");
6656               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6657               dump_printf (MSG_NOTE, "\n");
6658             }
6659
6660           stmt_info = vinfo_for_stmt (stmt);
6661
6662           /* vector stmts created in the outer-loop during vectorization of
6663              stmts in an inner-loop may not have a stmt_info, and do not
6664              need to be vectorized.  */
6665           if (!stmt_info)
6666             {
6667               gsi_next (&si);
6668               continue;
6669             }
6670
6671           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6672             vect_loop_kill_debug_uses (loop, stmt);
6673
6674           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6675               && !STMT_VINFO_LIVE_P (stmt_info))
6676             {
6677               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6678                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6679                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6680                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6681                 {
6682                   stmt = pattern_stmt;
6683                   stmt_info = vinfo_for_stmt (stmt);
6684                 }
6685               else
6686                 {
6687                   gsi_next (&si);
6688                   continue;
6689                 }
6690             }
6691           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6692                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6693                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6694                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6695             transform_pattern_stmt = true;
6696
6697           /* If pattern statement has def stmts, vectorize them too.  */
6698           if (is_pattern_stmt_p (stmt_info))
6699             {
6700               if (pattern_def_seq == NULL)
6701                 {
6702                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6703                   pattern_def_si = gsi_start (pattern_def_seq);
6704                 }
6705               else if (!gsi_end_p (pattern_def_si))
6706                 gsi_next (&pattern_def_si);
6707               if (pattern_def_seq != NULL)
6708                 {
6709                   gimple *pattern_def_stmt = NULL;
6710                   stmt_vec_info pattern_def_stmt_info = NULL;
6711
6712                   while (!gsi_end_p (pattern_def_si))
6713                     {
6714                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6715                       pattern_def_stmt_info
6716                         = vinfo_for_stmt (pattern_def_stmt);
6717                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6718                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6719                         break;
6720                       gsi_next (&pattern_def_si);
6721                     }
6722
6723                   if (!gsi_end_p (pattern_def_si))
6724                     {
6725                       if (dump_enabled_p ())
6726                         {
6727                           dump_printf_loc (MSG_NOTE, vect_location,
6728                                            "==> vectorizing pattern def "
6729                                            "stmt: ");
6730                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6731                                             pattern_def_stmt, 0);
6732                           dump_printf (MSG_NOTE, "\n");
6733                         }
6734
6735                       stmt = pattern_def_stmt;
6736                       stmt_info = pattern_def_stmt_info;
6737                     }
6738                   else
6739                     {
6740                       pattern_def_si = gsi_none ();
6741                       transform_pattern_stmt = false;
6742                     }
6743                 }
6744               else
6745                 transform_pattern_stmt = false;
6746             }
6747
6748           if (STMT_VINFO_VECTYPE (stmt_info))
6749             {
6750               unsigned int nunits
6751                 = (unsigned int)
6752                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6753               if (!STMT_SLP_TYPE (stmt_info)
6754                   && nunits != (unsigned int) vectorization_factor
6755                   && dump_enabled_p ())
6756                   /* For SLP VF is set according to unrolling factor, and not
6757                      to vector size, hence for SLP this print is not valid.  */
6758                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6759             }
6760
6761           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6762              reached.  */
6763           if (STMT_SLP_TYPE (stmt_info))
6764             {
6765               if (!slp_scheduled)
6766                 {
6767                   slp_scheduled = true;
6768
6769                   if (dump_enabled_p ())
6770                     dump_printf_loc (MSG_NOTE, vect_location,
6771                                      "=== scheduling SLP instances ===\n");
6772
6773                   vect_schedule_slp (loop_vinfo);
6774                 }
6775
6776               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6777               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6778                 {
6779                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6780                     {
6781                       pattern_def_seq = NULL;
6782                       gsi_next (&si);
6783                     }
6784                   continue;
6785                 }
6786             }
6787
6788           /* -------- vectorize statement ------------ */
6789           if (dump_enabled_p ())
6790             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6791
6792           grouped_store = false;
6793           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6794           if (is_store)
6795             {
6796               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6797                 {
6798                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6799                      interleaving chain was completed - free all the stores in
6800                      the chain.  */
6801                   gsi_next (&si);
6802                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6803                 }
6804               else
6805                 {
6806                   /* Free the attached stmt_vec_info and remove the stmt.  */
6807                   gimple *store = gsi_stmt (si);
6808                   free_stmt_vec_info (store);
6809                   unlink_stmt_vdef (store);
6810                   gsi_remove (&si, true);
6811                   release_defs (store);
6812                 }
6813
6814               /* Stores can only appear at the end of pattern statements.  */
6815               gcc_assert (!transform_pattern_stmt);
6816               pattern_def_seq = NULL;
6817             }
6818           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6819             {
6820               pattern_def_seq = NULL;
6821               gsi_next (&si);
6822             }
6823         }                       /* stmts in BB */
6824     }                           /* BBs in loop */
6825
6826   slpeel_make_loop_iterate_ntimes (loop, ratio);
6827
6828   /* Reduce loop iterations by the vectorization factor.  */
6829   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6830                       expected_iterations / vectorization_factor);
6831   loop->nb_iterations_upper_bound
6832     = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6833   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6834       && loop->nb_iterations_upper_bound != 0)
6835     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6836   if (loop->any_estimate)
6837     {
6838       loop->nb_iterations_estimate
6839         = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6840        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6841            && loop->nb_iterations_estimate != 0)
6842          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6843     }
6844
6845   if (dump_enabled_p ())
6846     {
6847       dump_printf_loc (MSG_NOTE, vect_location,
6848                        "LOOP VECTORIZED\n");
6849       if (loop->inner)
6850         dump_printf_loc (MSG_NOTE, vect_location,
6851                          "OUTER LOOP VECTORIZED\n");
6852       dump_printf (MSG_NOTE, "\n");
6853     }
6854 }