gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "cfghooks.h"
  31 #include "tree-pass.h"
  32 #include "ssa.h"
  33 #include "optabs-tree.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "cfganal.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "cfgloop.h"
  45 #include "params.h"
  46 #include "tree-scalar-evolution.h"
  47 #include "tree-vectorizer.h"
  48 #include "gimple-fold.h"
  49 #include "cgraph.h"
  50
  51 /* Loop Vectorization Pass.
  52
  53    This pass tries to vectorize loops.
  54
  55    For example, the vectorizer transforms the following simple loop:
  56
  57         short a[N]; short b[N]; short c[N]; int i;
  58
  59         for (i=0; i<N; i++){
  60           a[i] = b[i] + c[i];
  61         }
  62
  63    as if it was manually vectorized by rewriting the source code into:
  64
  65         typedef int __attribute__((mode(V8HI))) v8hi;
  66         short a[N];  short b[N]; short c[N];   int i;
  67         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  68         v8hi va, vb, vc;
  69
  70         for (i=0; i<N/8; i++){
  71           vb = pb[i];
  72           vc = pc[i];
  73           va = vb + vc;
  74           pa[i] = va;
  75         }
  76
  77         The main entry to this pass is vectorize_loops(), in which
  78    the vectorizer applies a set of analyses on a given set of loops,
  79    followed by the actual vectorization transformation for the loops that
  80    had successfully passed the analysis phase.
  81         Throughout this pass we make a distinction between two types of
  82    data: scalars (which are represented by SSA_NAMES), and memory references
  83    ("data-refs").  These two types of data require different handling both
  84    during analysis and transformation. The types of data-refs that the
  85    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  86    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  87    accesses are required to have a simple (consecutive) access pattern.
  88
  89    Analysis phase:
  90    ===============
  91         The driver for the analysis phase is vect_analyze_loop().
  92    It applies a set of analyses, some of which rely on the scalar evolution
  93    analyzer (scev) developed by Sebastian Pop.
  94
  95         During the analysis phase the vectorizer records some information
  96    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  97    loop, as well as general information about the loop as a whole, which is
  98    recorded in a "loop_vec_info" struct attached to each loop.
  99
 100    Transformation phase:
 101    =====================
 102         The loop transformation phase scans all the stmts in the loop, and
 103    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 104    the loop that needs to be vectorized.  It inserts the vector code sequence
 105    just before the scalar stmt S, and records a pointer to the vector code
 106    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 107    attached to S).  This pointer will be used for the vectorization of following
 108    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 109    otherwise, we rely on dead code elimination for removing it.
 110
 111         For example, say stmt S1 was vectorized into stmt VS1:
 112
 113    VS1: vb = px[i];
 114    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 115    S2:  a = b;
 116
 117    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 118    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 119    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 120    resulting sequence would be:
 121
 122    VS1: vb = px[i];
 123    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 124    VS2: va = vb;
 125    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 126
 127         Operands that are not SSA_NAMEs, are data-refs that appear in
 128    load/store operations (like 'x[i]' in S1), and are handled differently.
 129
 130    Target modeling:
 131    =================
 132         Currently the only target specific information that is used is the
 133    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 134    Targets that can support different sizes of vectors, for now will need
 135    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 136    flexibility will be added in the future.
 137
 138         Since we only vectorize operations which vector form can be
 139    expressed using existing tree codes, to verify that an operation is
 140    supported, the vectorizer checks the relevant optab at the relevant
 141    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 142    the value found is CODE_FOR_nothing, then there's no target support, and
 143    we can't vectorize the stmt.
 144
 145    For additional information on this project see:
 146    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 147 */
 148
 149 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 150
 151 /* Function vect_determine_vectorization_factor
 152
 153    Determine the vectorization factor (VF).  VF is the number of data elements
 154    that are operated upon in parallel in a single iteration of the vectorized
 155    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 156    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 157    elements can fit in a single vector register.
 158
 159    We currently support vectorization of loops in which all types operated upon
 160    are of the same size.  Therefore this function currently sets VF according to
 161    the size of the types operated upon, and fails if there are multiple sizes
 162    in the loop.
 163
 164    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 165    original loop:
 166         for (i=0; i<N; i++){
 167           a[i] = b[i] + c[i];
 168         }
 169
 170    vectorized loop:
 171         for (i=0; i<N; i+=VF){
 172           a[i:VF] = b[i:VF] + c[i:VF];
 173         }
 174 */
 175
 176 static bool
 177 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 178 {
 179   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 180   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 181   unsigned nbbs = loop->num_nodes;
 182   unsigned int vectorization_factor = 0;
 183   tree scalar_type;
 184   gphi *phi;
 185   tree vectype;
 186   unsigned int nunits;
 187   stmt_vec_info stmt_info;
 188   unsigned i;
 189   HOST_WIDE_INT dummy;
 190   gimple *stmt, *pattern_stmt = NULL;
 191   gimple_seq pattern_def_seq = NULL;
 192   gimple_stmt_iterator pattern_def_si = gsi_none ();
 193   bool analyze_pattern_stmt = false;
 194   bool bool_result;
 195   auto_vec<stmt_vec_info> mask_producers;
 196
 197   if (dump_enabled_p ())
 198     dump_printf_loc (MSG_NOTE, vect_location,
 199                      "=== vect_determine_vectorization_factor ===\n");
 200
 201   for (i = 0; i < nbbs; i++)
 202     {
 203       basic_block bb = bbs[i];
 204
 205       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 206            gsi_next (&si))
 207         {
 208           phi = si.phi ();
 209           stmt_info = vinfo_for_stmt (phi);
 210           if (dump_enabled_p ())
 211             {
 212               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 213               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 214               dump_printf (MSG_NOTE, "\n");
 215             }
 216
 217           gcc_assert (stmt_info);
 218
 219           if (STMT_VINFO_RELEVANT_P (stmt_info))
 220             {
 221               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 222               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 223
 224               if (dump_enabled_p ())
 225                 {
 226                   dump_printf_loc (MSG_NOTE, vect_location,
 227                                    "get vectype for scalar type:  ");
 228                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 229                   dump_printf (MSG_NOTE, "\n");
 230                 }
 231
 232               vectype = get_vectype_for_scalar_type (scalar_type);
 233               if (!vectype)
 234                 {
 235                   if (dump_enabled_p ())
 236                     {
 237                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 238                                        "not vectorized: unsupported "
 239                                        "data-type ");
 240                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 241                                          scalar_type);
 242                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 243                     }
 244                   return false;
 245                 }
 246               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 247
 248               if (dump_enabled_p ())
 249                 {
 250                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 251                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 252                   dump_printf (MSG_NOTE, "\n");
 253                 }
 254
 255               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 256               if (dump_enabled_p ())
 257                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 258                                  nunits);
 259
 260               if (!vectorization_factor
 261                   || (nunits > vectorization_factor))
 262                 vectorization_factor = nunits;
 263             }
 264         }
 265
 266       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 267            !gsi_end_p (si) || analyze_pattern_stmt;)
 268         {
 269           tree vf_vectype;
 270
 271           if (analyze_pattern_stmt)
 272             stmt = pattern_stmt;
 273           else
 274             stmt = gsi_stmt (si);
 275
 276           stmt_info = vinfo_for_stmt (stmt);
 277
 278           if (dump_enabled_p ())
 279             {
 280               dump_printf_loc (MSG_NOTE, vect_location,
 281                                "==> examining statement: ");
 282               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 283               dump_printf (MSG_NOTE, "\n");
 284             }
 285
 286           gcc_assert (stmt_info);
 287
 288           /* Skip stmts which do not need to be vectorized.  */
 289           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 290                && !STMT_VINFO_LIVE_P (stmt_info))
 291               || gimple_clobber_p (stmt))
 292             {
 293               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 294                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 295                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 296                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 297                 {
 298                   stmt = pattern_stmt;
 299                   stmt_info = vinfo_for_stmt (pattern_stmt);
 300                   if (dump_enabled_p ())
 301                     {
 302                       dump_printf_loc (MSG_NOTE, vect_location,
 303                                        "==> examining pattern statement: ");
 304                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 305                       dump_printf (MSG_NOTE, "\n");
 306                     }
 307                 }
 308               else
 309                 {
 310                   if (dump_enabled_p ())
 311                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 312                   gsi_next (&si);
 313                   continue;
 314                 }
 315             }
 316           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 317                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 318                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 319                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 320             analyze_pattern_stmt = true;
 321
 322           /* If a pattern statement has def stmts, analyze them too.  */
 323           if (is_pattern_stmt_p (stmt_info))
 324             {
 325               if (pattern_def_seq == NULL)
 326                 {
 327                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 328                   pattern_def_si = gsi_start (pattern_def_seq);
 329                 }
 330               else if (!gsi_end_p (pattern_def_si))
 331                 gsi_next (&pattern_def_si);
 332               if (pattern_def_seq != NULL)
 333                 {
 334                   gimple *pattern_def_stmt = NULL;
 335                   stmt_vec_info pattern_def_stmt_info = NULL;
 336
 337                   while (!gsi_end_p (pattern_def_si))
 338                     {
 339                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 340                       pattern_def_stmt_info
 341                         = vinfo_for_stmt (pattern_def_stmt);
 342                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 343                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 344                         break;
 345                       gsi_next (&pattern_def_si);
 346                     }
 347
 348                   if (!gsi_end_p (pattern_def_si))
 349                     {
 350                       if (dump_enabled_p ())
 351                         {
 352                           dump_printf_loc (MSG_NOTE, vect_location,
 353                                            "==> examining pattern def stmt: ");
 354                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 355                                             pattern_def_stmt, 0);
 356                           dump_printf (MSG_NOTE, "\n");
 357                         }
 358
 359                       stmt = pattern_def_stmt;
 360                       stmt_info = pattern_def_stmt_info;
 361                     }
 362                   else
 363                     {
 364                       pattern_def_si = gsi_none ();
 365                       analyze_pattern_stmt = false;
 366                     }
 367                 }
 368               else
 369                 analyze_pattern_stmt = false;
 370             }
 371
 372           if (gimple_get_lhs (stmt) == NULL_TREE
 373               /* MASK_STORE has no lhs, but is ok.  */
 374               && (!is_gimple_call (stmt)
 375                   || !gimple_call_internal_p (stmt)
 376                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 377             {
 378               if (is_gimple_call (stmt))
 379                 {
 380                   /* Ignore calls with no lhs.  These must be calls to
 381                      #pragma omp simd functions, and what vectorization factor
 382                      it really needs can't be determined until
 383                      vectorizable_simd_clone_call.  */
 384                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 385                     {
 386                       pattern_def_seq = NULL;
 387                       gsi_next (&si);
 388                     }
 389                   continue;
 390                 }
 391               if (dump_enabled_p ())
 392                 {
 393                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 394                                    "not vectorized: irregular stmt.");
 395                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 396                                     0);
 397                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 398                 }
 399               return false;
 400             }
 401
 402           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 403             {
 404               if (dump_enabled_p ())
 405                 {
 406                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 407                                    "not vectorized: vector stmt in loop:");
 408                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 409                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 410                 }
 411               return false;
 412             }
 413
 414           bool_result = false;
 415
 416           if (STMT_VINFO_VECTYPE (stmt_info))
 417             {
 418               /* The only case when a vectype had been already set is for stmts
 419                  that contain a dataref, or for "pattern-stmts" (stmts
 420                  generated by the vectorizer to represent/replace a certain
 421                  idiom).  */
 422               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 423                           || is_pattern_stmt_p (stmt_info)
 424                           || !gsi_end_p (pattern_def_si));
 425               vectype = STMT_VINFO_VECTYPE (stmt_info);
 426             }
 427           else
 428             {
 429               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 430               if (is_gimple_call (stmt)
 431                   && gimple_call_internal_p (stmt)
 432                   && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
 433                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 434               else
 435                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 436
 437               /* Bool ops don't participate in vectorization factor
 438                  computation.  For comparison use compared types to
 439                  compute a factor.  */
 440               if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
 441                 {
 442                   if (STMT_VINFO_RELEVANT_P (stmt_info))
 443                     mask_producers.safe_push (stmt_info);
 444                   bool_result = true;
 445
 446                   if (gimple_code (stmt) == GIMPLE_ASSIGN
 447                       && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
 448                          == tcc_comparison
 449                       && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
 450                          != BOOLEAN_TYPE)
 451                     scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 452                   else
 453                     {
 454                       if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 455                         {
 456                           pattern_def_seq = NULL;
 457                           gsi_next (&si);
 458                         }
 459                       continue;
 460                     }
 461                 }
 462
 463               if (dump_enabled_p ())
 464                 {
 465                   dump_printf_loc (MSG_NOTE, vect_location,
 466                                    "get vectype for scalar type:  ");
 467                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 468                   dump_printf (MSG_NOTE, "\n");
 469                 }
 470               vectype = get_vectype_for_scalar_type (scalar_type);
 471               if (!vectype)
 472                 {
 473                   if (dump_enabled_p ())
 474                     {
 475                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 476                                        "not vectorized: unsupported "
 477                                        "data-type ");
 478                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 479                                          scalar_type);
 480                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 481                     }
 482                   return false;
 483                 }
 484
 485               if (!bool_result)
 486                 STMT_VINFO_VECTYPE (stmt_info) = vectype;
 487
 488               if (dump_enabled_p ())
 489                 {
 490                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 491                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 492                   dump_printf (MSG_NOTE, "\n");
 493                 }
 494             }
 495
 496           /* Don't try to compute VF out scalar types if we stmt
 497              produces boolean vector.  Use result vectype instead.  */
 498           if (VECTOR_BOOLEAN_TYPE_P (vectype))
 499             vf_vectype = vectype;
 500           else
 501             {
 502               /* The vectorization factor is according to the smallest
 503                  scalar type (or the largest vector size, but we only
 504                  support one vector size per loop).  */
 505               if (!bool_result)
 506                 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 507                                                              &dummy);
 508               if (dump_enabled_p ())
 509                 {
 510                   dump_printf_loc (MSG_NOTE, vect_location,
 511                                    "get vectype for scalar type:  ");
 512                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 513                   dump_printf (MSG_NOTE, "\n");
 514                 }
 515               vf_vectype = get_vectype_for_scalar_type (scalar_type);
 516             }
 517           if (!vf_vectype)
 518             {
 519               if (dump_enabled_p ())
 520                 {
 521                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 522                                    "not vectorized: unsupported data-type ");
 523                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 524                                      scalar_type);
 525                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 526                 }
 527               return false;
 528             }
 529
 530           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 531                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 532             {
 533               if (dump_enabled_p ())
 534                 {
 535                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 536                                    "not vectorized: different sized vector "
 537                                    "types in statement, ");
 538                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 539                                      vectype);
 540                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 541                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 542                                      vf_vectype);
 543                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 544                 }
 545               return false;
 546             }
 547
 548           if (dump_enabled_p ())
 549             {
 550               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 551               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 552               dump_printf (MSG_NOTE, "\n");
 553             }
 554
 555           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 556           if (dump_enabled_p ())
 557             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 558           if (!vectorization_factor
 559               || (nunits > vectorization_factor))
 560             vectorization_factor = nunits;
 561
 562           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 563             {
 564               pattern_def_seq = NULL;
 565               gsi_next (&si);
 566             }
 567         }
 568     }
 569
 570   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 571   if (dump_enabled_p ())
 572     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 573                      vectorization_factor);
 574   if (vectorization_factor <= 1)
 575     {
 576       if (dump_enabled_p ())
 577         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 578                          "not vectorized: unsupported data-type\n");
 579       return false;
 580     }
 581   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 582
 583   for (i = 0; i < mask_producers.length (); i++)
 584     {
 585       tree mask_type = NULL;
 586
 587       stmt = STMT_VINFO_STMT (mask_producers[i]);
 588
 589       if (gimple_code (stmt) == GIMPLE_ASSIGN
 590           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
 591           && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
 592         {
 593           scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 594           mask_type = get_mask_type_for_scalar_type (scalar_type);
 595
 596           if (!mask_type)
 597             {
 598               if (dump_enabled_p ())
 599                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 600                                  "not vectorized: unsupported mask\n");
 601               return false;
 602             }
 603         }
 604       else
 605         {
 606           tree rhs;
 607           ssa_op_iter iter;
 608           gimple *def_stmt;
 609           enum vect_def_type dt;
 610
 611           FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
 612             {
 613               if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
 614                                        &def_stmt, &dt, &vectype))
 615                 {
 616                   if (dump_enabled_p ())
 617                     {
 618                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 619                                        "not vectorized: can't compute mask type "
 620                                        "for statement, ");
 621                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 622                                         0);
 623                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 624                     }
 625                   return false;
 626                 }
 627
 628               /* No vectype probably means external definition.
 629                  Allow it in case there is another operand which
 630                  allows to determine mask type.  */
 631               if (!vectype)
 632                 continue;
 633
 634               if (!mask_type)
 635                 mask_type = vectype;
 636               else if (TYPE_VECTOR_SUBPARTS (mask_type)
 637                        != TYPE_VECTOR_SUBPARTS (vectype))
 638                 {
 639                   if (dump_enabled_p ())
 640                     {
 641                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 642                                        "not vectorized: different sized masks "
 643                                        "types in statement, ");
 644                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 645                                          mask_type);
 646                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 647                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 648                                          vectype);
 649                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 650                     }
 651                   return false;
 652                 }
 653               else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
 654                        != VECTOR_BOOLEAN_TYPE_P (vectype))
 655                 {
 656                   if (dump_enabled_p ())
 657                     {
 658                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 659                                        "not vectorized: mixed mask and "
 660                                        "nonmask vector types in statement, ");
 661                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 662                                          mask_type);
 663                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 664                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 665                                          vectype);
 666                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 667                     }
 668                   return false;
 669                 }
 670             }
 671
 672           /* We may compare boolean value loaded as vector of integers.
 673              Fix mask_type in such case.  */
 674           if (mask_type
 675               && !VECTOR_BOOLEAN_TYPE_P (mask_type)
 676               && gimple_code (stmt) == GIMPLE_ASSIGN
 677               && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
 678             mask_type = build_same_sized_truth_vector_type (mask_type);
 679         }
 680
 681       /* No mask_type should mean loop invariant predicate.
 682          This is probably a subject for optimization in
 683          if-conversion.  */
 684       if (!mask_type)
 685         {
 686           if (dump_enabled_p ())
 687             {
 688               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 689                                "not vectorized: can't compute mask type "
 690                                "for statement, ");
 691               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 692                                 0);
 693               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 694             }
 695           return false;
 696         }
 697
 698       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
 699     }
 700
 701   return true;
 702 }
 703
 704
 705 /* Function vect_is_simple_iv_evolution.
 706
 707    FORNOW: A simple evolution of an induction variables in the loop is
 708    considered a polynomial evolution.  */
 709
 710 static bool
 711 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 712                              tree * step)
 713 {
 714   tree init_expr;
 715   tree step_expr;
 716   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 717   basic_block bb;
 718
 719   /* When there is no evolution in this loop, the evolution function
 720      is not "simple".  */
 721   if (evolution_part == NULL_TREE)
 722     return false;
 723
 724   /* When the evolution is a polynomial of degree >= 2
 725      the evolution function is not "simple".  */
 726   if (tree_is_chrec (evolution_part))
 727     return false;
 728
 729   step_expr = evolution_part;
 730   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 731
 732   if (dump_enabled_p ())
 733     {
 734       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 735       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 736       dump_printf (MSG_NOTE, ",  init: ");
 737       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 738       dump_printf (MSG_NOTE, "\n");
 739     }
 740
 741   *init = init_expr;
 742   *step = step_expr;
 743
 744   if (TREE_CODE (step_expr) != INTEGER_CST
 745       && (TREE_CODE (step_expr) != SSA_NAME
 746           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 747               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 748           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 749               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 750                   || !flag_associative_math)))
 751       && (TREE_CODE (step_expr) != REAL_CST
 752           || !flag_associative_math))
 753     {
 754       if (dump_enabled_p ())
 755         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 756                          "step unknown.\n");
 757       return false;
 758     }
 759
 760   return true;
 761 }
 762
 763 /* Function vect_analyze_scalar_cycles_1.
 764
 765    Examine the cross iteration def-use cycles of scalar variables
 766    in LOOP.  LOOP_VINFO represents the loop that is now being
 767    considered for vectorization (can be LOOP, or an outer-loop
 768    enclosing LOOP).  */
 769
 770 static void
 771 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 772 {
 773   basic_block bb = loop->header;
 774   tree init, step;
 775   auto_vec<gimple *, 64> worklist;
 776   gphi_iterator gsi;
 777   bool double_reduc;
 778
 779   if (dump_enabled_p ())
 780     dump_printf_loc (MSG_NOTE, vect_location,
 781                      "=== vect_analyze_scalar_cycles ===\n");
 782
 783   /* First - identify all inductions.  Reduction detection assumes that all the
 784      inductions have been identified, therefore, this order must not be
 785      changed.  */
 786   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 787     {
 788       gphi *phi = gsi.phi ();
 789       tree access_fn = NULL;
 790       tree def = PHI_RESULT (phi);
 791       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 792
 793       if (dump_enabled_p ())
 794         {
 795           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 796           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 797           dump_printf (MSG_NOTE, "\n");
 798         }
 799
 800       /* Skip virtual phi's.  The data dependences that are associated with
 801          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 802       if (virtual_operand_p (def))
 803         continue;
 804
 805       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 806
 807       /* Analyze the evolution function.  */
 808       access_fn = analyze_scalar_evolution (loop, def);
 809       if (access_fn)
 810         {
 811           STRIP_NOPS (access_fn);
 812           if (dump_enabled_p ())
 813             {
 814               dump_printf_loc (MSG_NOTE, vect_location,
 815                                "Access function of PHI: ");
 816               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 817               dump_printf (MSG_NOTE, "\n");
 818             }
 819           STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 820             = initial_condition_in_loop_num (access_fn, loop->num);
 821           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 822             = evolution_part_in_loop_num (access_fn, loop->num);
 823         }
 824
 825       if (!access_fn
 826           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 827           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 828               && TREE_CODE (step) != INTEGER_CST))
 829         {
 830           worklist.safe_push (phi);
 831           continue;
 832         }
 833
 834       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 835                   != NULL_TREE);
 836       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 837
 838       if (dump_enabled_p ())
 839         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 840       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 841     }
 842
 843
 844   /* Second - identify all reductions and nested cycles.  */
 845   while (worklist.length () > 0)
 846     {
 847       gimple *phi = worklist.pop ();
 848       tree def = PHI_RESULT (phi);
 849       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 850       gimple *reduc_stmt;
 851       bool nested_cycle;
 852
 853       if (dump_enabled_p ())
 854         {
 855           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 856           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 857           dump_printf (MSG_NOTE, "\n");
 858         }
 859
 860       gcc_assert (!virtual_operand_p (def)
 861                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 862
 863       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 864       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 865                                                 &double_reduc, false);
 866       if (reduc_stmt)
 867         {
 868           if (double_reduc)
 869             {
 870               if (dump_enabled_p ())
 871                 dump_printf_loc (MSG_NOTE, vect_location,
 872                                  "Detected double reduction.\n");
 873
 874               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 875               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 876                                                     vect_double_reduction_def;
 877             }
 878           else
 879             {
 880               if (nested_cycle)
 881                 {
 882                   if (dump_enabled_p ())
 883                     dump_printf_loc (MSG_NOTE, vect_location,
 884                                      "Detected vectorizable nested cycle.\n");
 885
 886                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 887                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 888                                                              vect_nested_cycle;
 889                 }
 890               else
 891                 {
 892                   if (dump_enabled_p ())
 893                     dump_printf_loc (MSG_NOTE, vect_location,
 894                                      "Detected reduction.\n");
 895
 896                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 897                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 898                                                            vect_reduction_def;
 899                   /* Store the reduction cycles for possible vectorization in
 900                      loop-aware SLP.  */
 901                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 902                 }
 903             }
 904         }
 905       else
 906         if (dump_enabled_p ())
 907           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 908                            "Unknown def-use cycle pattern.\n");
 909     }
 910 }
 911
 912
 913 /* Function vect_analyze_scalar_cycles.
 914
 915    Examine the cross iteration def-use cycles of scalar variables, by
 916    analyzing the loop-header PHIs of scalar variables.  Classify each
 917    cycle as one of the following: invariant, induction, reduction, unknown.
 918    We do that for the loop represented by LOOP_VINFO, and also to its
 919    inner-loop, if exists.
 920    Examples for scalar cycles:
 921
 922    Example1: reduction:
 923
 924               loop1:
 925               for (i=0; i<N; i++)
 926                  sum += a[i];
 927
 928    Example2: induction:
 929
 930               loop2:
 931               for (i=0; i<N; i++)
 932                  a[i] = i;  */
 933
 934 static void
 935 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 936 {
 937   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 938
 939   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 940
 941   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 942      Reductions in such inner-loop therefore have different properties than
 943      the reductions in the nest that gets vectorized:
 944      1. When vectorized, they are executed in the same order as in the original
 945         scalar loop, so we can't change the order of computation when
 946         vectorizing them.
 947      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 948         current checks are too strict.  */
 949
 950   if (loop->inner)
 951     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 952 }
 953
 954 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 955
 956 static void
 957 vect_fixup_reduc_chain (gimple *stmt)
 958 {
 959   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 960   gimple *stmtp;
 961   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 962               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 963   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 964   do
 965     {
 966       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 967       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 968       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 969       if (stmt)
 970         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 971           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 972     }
 973   while (stmt);
 974   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 975 }
 976
 977 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 978
 979 static void
 980 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 981 {
 982   gimple *first;
 983   unsigned i;
 984
 985   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 986     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 987       {
 988         vect_fixup_reduc_chain (first);
 989         LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 990           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 991       }
 992 }
 993
 994 /* Function vect_get_loop_niters.
 995
 996    Determine how many iterations the loop is executed and place it
 997    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
 998    in NUMBER_OF_ITERATIONSM1.
 999
1000    Return the loop exit condition.  */
1001
1002
1003 static gcond *
1004 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
1005                       tree *number_of_iterationsm1)
1006 {
1007   tree niters;
1008
1009   if (dump_enabled_p ())
1010     dump_printf_loc (MSG_NOTE, vect_location,
1011                      "=== get_loop_niters ===\n");
1012
1013   niters = number_of_latch_executions (loop);
1014   *number_of_iterationsm1 = niters;
1015
1016   /* We want the number of loop header executions which is the number
1017      of latch executions plus one.
1018      ???  For UINT_MAX latch executions this number overflows to zero
1019      for loops like do { n++; } while (n != 0);  */
1020   if (niters && !chrec_contains_undetermined (niters))
1021     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
1022                           build_int_cst (TREE_TYPE (niters), 1));
1023   *number_of_iterations = niters;
1024
1025   return get_loop_exit_condition (loop);
1026 }
1027
1028
1029 /* Function bb_in_loop_p
1030
1031    Used as predicate for dfs order traversal of the loop bbs.  */
1032
1033 static bool
1034 bb_in_loop_p (const_basic_block bb, const void *data)
1035 {
1036   const struct loop *const loop = (const struct loop *)data;
1037   if (flow_bb_inside_loop_p (loop, bb))
1038     return true;
1039   return false;
1040 }
1041
1042
1043 /* Function new_loop_vec_info.
1044
1045    Create and initialize a new loop_vec_info struct for LOOP, as well as
1046    stmt_vec_info structs for all the stmts in LOOP.  */
1047
1048 static loop_vec_info
1049 new_loop_vec_info (struct loop *loop)
1050 {
1051   loop_vec_info res;
1052   basic_block *bbs;
1053   gimple_stmt_iterator si;
1054   unsigned int i, nbbs;
1055
1056   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1057   res->kind = vec_info::loop;
1058   LOOP_VINFO_LOOP (res) = loop;
1059
1060   bbs = get_loop_body (loop);
1061
1062   /* Create/Update stmt_info for all stmts in the loop.  */
1063   for (i = 0; i < loop->num_nodes; i++)
1064     {
1065       basic_block bb = bbs[i];
1066
1067       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1068         {
1069           gimple *phi = gsi_stmt (si);
1070           gimple_set_uid (phi, 0);
1071           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1072         }
1073
1074       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1075         {
1076           gimple *stmt = gsi_stmt (si);
1077           gimple_set_uid (stmt, 0);
1078           set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1079         }
1080     }
1081
1082   /* CHECKME: We want to visit all BBs before their successors (except for
1083      latch blocks, for which this assertion wouldn't hold).  In the simple
1084      case of the loop forms we allow, a dfs order of the BBs would the same
1085      as reversed postorder traversal, so we are safe.  */
1086
1087    free (bbs);
1088    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1089    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1090                               bbs, loop->num_nodes, loop);
1091    gcc_assert (nbbs == loop->num_nodes);
1092
1093   LOOP_VINFO_BBS (res) = bbs;
1094   LOOP_VINFO_NITERSM1 (res) = NULL;
1095   LOOP_VINFO_NITERS (res) = NULL;
1096   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1097   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1098   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1099   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1100   LOOP_VINFO_VECT_FACTOR (res) = 0;
1101   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1102   LOOP_VINFO_DATAREFS (res) = vNULL;
1103   LOOP_VINFO_DDRS (res) = vNULL;
1104   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1105   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1106   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1107   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1108   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1109   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1110   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1111   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1112   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1113   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1114   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1115   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1116
1117   return res;
1118 }
1119
1120
1121 /* Function destroy_loop_vec_info.
1122
1123    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1124    stmts in the loop.  */
1125
1126 void
1127 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1128 {
1129   struct loop *loop;
1130   basic_block *bbs;
1131   int nbbs;
1132   gimple_stmt_iterator si;
1133   int j;
1134   vec<slp_instance> slp_instances;
1135   slp_instance instance;
1136   bool swapped;
1137
1138   if (!loop_vinfo)
1139     return;
1140
1141   loop = LOOP_VINFO_LOOP (loop_vinfo);
1142
1143   bbs = LOOP_VINFO_BBS (loop_vinfo);
1144   nbbs = clean_stmts ? loop->num_nodes : 0;
1145   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1146
1147   for (j = 0; j < nbbs; j++)
1148     {
1149       basic_block bb = bbs[j];
1150       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1151         free_stmt_vec_info (gsi_stmt (si));
1152
1153       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1154         {
1155           gimple *stmt = gsi_stmt (si);
1156
1157           /* We may have broken canonical form by moving a constant
1158              into RHS1 of a commutative op.  Fix such occurrences.  */
1159           if (swapped && is_gimple_assign (stmt))
1160             {
1161               enum tree_code code = gimple_assign_rhs_code (stmt);
1162
1163               if ((code == PLUS_EXPR
1164                    || code == POINTER_PLUS_EXPR
1165                    || code == MULT_EXPR)
1166                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1167                 swap_ssa_operands (stmt,
1168                                    gimple_assign_rhs1_ptr (stmt),
1169                                    gimple_assign_rhs2_ptr (stmt));
1170             }
1171
1172           /* Free stmt_vec_info.  */
1173           free_stmt_vec_info (stmt);
1174           gsi_next (&si);
1175         }
1176     }
1177
1178   free (LOOP_VINFO_BBS (loop_vinfo));
1179   vect_destroy_datarefs (loop_vinfo);
1180   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1181   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1182   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1183   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
1184   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1185   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1186   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1187     vect_free_slp_instance (instance);
1188
1189   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1190   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1191   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1192   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1193
1194   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1195   loop_vinfo->scalar_cost_vec.release ();
1196
1197   free (loop_vinfo);
1198   loop->aux = NULL;
1199 }
1200
1201
1202 /* Calculate the cost of one scalar iteration of the loop.  */
1203 static void
1204 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1205 {
1206   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1207   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1208   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1209   int innerloop_iters, i;
1210
1211   /* Count statements in scalar loop.  Using this as scalar cost for a single
1212      iteration for now.
1213
1214      TODO: Add outer loop support.
1215
1216      TODO: Consider assigning different costs to different scalar
1217      statements.  */
1218
1219   /* FORNOW.  */
1220   innerloop_iters = 1;
1221   if (loop->inner)
1222     innerloop_iters = 50; /* FIXME */
1223
1224   for (i = 0; i < nbbs; i++)
1225     {
1226       gimple_stmt_iterator si;
1227       basic_block bb = bbs[i];
1228
1229       if (bb->loop_father == loop->inner)
1230         factor = innerloop_iters;
1231       else
1232         factor = 1;
1233
1234       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1235         {
1236           gimple *stmt = gsi_stmt (si);
1237           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1238
1239           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1240             continue;
1241
1242           /* Skip stmts that are not vectorized inside the loop.  */
1243           if (stmt_info
1244               && !STMT_VINFO_RELEVANT_P (stmt_info)
1245               && (!STMT_VINFO_LIVE_P (stmt_info)
1246                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1247               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1248             continue;
1249
1250           vect_cost_for_stmt kind;
1251           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1252             {
1253               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1254                kind = scalar_load;
1255              else
1256                kind = scalar_store;
1257             }
1258           else
1259             kind = scalar_stmt;
1260
1261           scalar_single_iter_cost
1262             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1263                                  factor, kind, NULL, 0, vect_prologue);
1264         }
1265     }
1266   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1267     = scalar_single_iter_cost;
1268 }
1269
1270
1271 /* Function vect_analyze_loop_form_1.
1272
1273    Verify that certain CFG restrictions hold, including:
1274    - the loop has a pre-header
1275    - the loop has a single entry and exit
1276    - the loop exit condition is simple enough, and the number of iterations
1277      can be analyzed (a countable loop).  */
1278
1279 bool
1280 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1281                           tree *number_of_iterationsm1,
1282                           tree *number_of_iterations, gcond **inner_loop_cond)
1283 {
1284   if (dump_enabled_p ())
1285     dump_printf_loc (MSG_NOTE, vect_location,
1286                      "=== vect_analyze_loop_form ===\n");
1287
1288   /* Different restrictions apply when we are considering an inner-most loop,
1289      vs. an outer (nested) loop.
1290      (FORNOW. May want to relax some of these restrictions in the future).  */
1291
1292   if (!loop->inner)
1293     {
1294       /* Inner-most loop.  We currently require that the number of BBs is
1295          exactly 2 (the header and latch).  Vectorizable inner-most loops
1296          look like this:
1297
1298                         (pre-header)
1299                            |
1300                           header <--------+
1301                            | |            |
1302                            | +--> latch --+
1303                            |
1304                         (exit-bb)  */
1305
1306       if (loop->num_nodes != 2)
1307         {
1308           if (dump_enabled_p ())
1309             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1310                              "not vectorized: control flow in loop.\n");
1311           return false;
1312         }
1313
1314       if (empty_block_p (loop->header))
1315         {
1316           if (dump_enabled_p ())
1317             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1318                              "not vectorized: empty loop.\n");
1319           return false;
1320         }
1321     }
1322   else
1323     {
1324       struct loop *innerloop = loop->inner;
1325       edge entryedge;
1326
1327       /* Nested loop. We currently require that the loop is doubly-nested,
1328          contains a single inner loop, and the number of BBs is exactly 5.
1329          Vectorizable outer-loops look like this:
1330
1331                         (pre-header)
1332                            |
1333                           header <---+
1334                            |         |
1335                           inner-loop |
1336                            |         |
1337                           tail ------+
1338                            |
1339                         (exit-bb)
1340
1341          The inner-loop has the properties expected of inner-most loops
1342          as described above.  */
1343
1344       if ((loop->inner)->inner || (loop->inner)->next)
1345         {
1346           if (dump_enabled_p ())
1347             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1348                              "not vectorized: multiple nested loops.\n");
1349           return false;
1350         }
1351
1352       if (loop->num_nodes != 5)
1353         {
1354           if (dump_enabled_p ())
1355             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1356                              "not vectorized: control flow in loop.\n");
1357           return false;
1358         }
1359
1360       entryedge = loop_preheader_edge (innerloop);
1361       if (entryedge->src != loop->header
1362           || !single_exit (innerloop)
1363           || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1364         {
1365           if (dump_enabled_p ())
1366             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1367                              "not vectorized: unsupported outerloop form.\n");
1368           return false;
1369         }
1370
1371       /* Analyze the inner-loop.  */
1372       tree inner_niterm1, inner_niter;
1373       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1374                                       &inner_niterm1, &inner_niter, NULL))
1375         {
1376           if (dump_enabled_p ())
1377             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1378                              "not vectorized: Bad inner loop.\n");
1379           return false;
1380         }
1381
1382       if (!expr_invariant_in_loop_p (loop, inner_niter))
1383         {
1384           if (dump_enabled_p ())
1385             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1386                              "not vectorized: inner-loop count not"
1387                              " invariant.\n");
1388           return false;
1389         }
1390
1391       if (dump_enabled_p ())
1392         dump_printf_loc (MSG_NOTE, vect_location,
1393                          "Considering outer-loop vectorization.\n");
1394     }
1395
1396   if (!single_exit (loop)
1397       || EDGE_COUNT (loop->header->preds) != 2)
1398     {
1399       if (dump_enabled_p ())
1400         {
1401           if (!single_exit (loop))
1402             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1403                              "not vectorized: multiple exits.\n");
1404           else if (EDGE_COUNT (loop->header->preds) != 2)
1405             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1406                              "not vectorized: too many incoming edges.\n");
1407         }
1408       return false;
1409     }
1410
1411   /* We assume that the loop exit condition is at the end of the loop. i.e,
1412      that the loop is represented as a do-while (with a proper if-guard
1413      before the loop if needed), where the loop header contains all the
1414      executable statements, and the latch is empty.  */
1415   if (!empty_block_p (loop->latch)
1416       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1417     {
1418       if (dump_enabled_p ())
1419         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1420                          "not vectorized: latch block not empty.\n");
1421       return false;
1422     }
1423
1424   /* Make sure there exists a single-predecessor exit bb:  */
1425   if (!single_pred_p (single_exit (loop)->dest))
1426     {
1427       edge e = single_exit (loop);
1428       if (!(e->flags & EDGE_ABNORMAL))
1429         {
1430           split_loop_exit_edge (e);
1431           if (dump_enabled_p ())
1432             dump_printf (MSG_NOTE, "split exit edge.\n");
1433         }
1434       else
1435         {
1436           if (dump_enabled_p ())
1437             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1438                              "not vectorized: abnormal loop exit edge.\n");
1439           return false;
1440         }
1441     }
1442
1443   *loop_cond = vect_get_loop_niters (loop, number_of_iterations,
1444                                      number_of_iterationsm1);
1445   if (!*loop_cond)
1446     {
1447       if (dump_enabled_p ())
1448         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1449                          "not vectorized: complicated exit condition.\n");
1450       return false;
1451     }
1452
1453   if (!*number_of_iterations
1454       || chrec_contains_undetermined (*number_of_iterations))
1455     {
1456       if (dump_enabled_p ())
1457         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1458                          "not vectorized: number of iterations cannot be "
1459                          "computed.\n");
1460       return false;
1461     }
1462
1463   if (integer_zerop (*number_of_iterations))
1464     {
1465       if (dump_enabled_p ())
1466         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1467                          "not vectorized: number of iterations = 0.\n");
1468       return false;
1469     }
1470
1471   return true;
1472 }
1473
1474 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1475
1476 loop_vec_info
1477 vect_analyze_loop_form (struct loop *loop)
1478 {
1479   tree number_of_iterations, number_of_iterationsm1;
1480   gcond *loop_cond, *inner_loop_cond = NULL;
1481
1482   if (! vect_analyze_loop_form_1 (loop, &loop_cond, &number_of_iterationsm1,
1483                                   &number_of_iterations, &inner_loop_cond))
1484     return NULL;
1485
1486   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1487   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1488   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1489   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1490
1491   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1492     {
1493       if (dump_enabled_p ())
1494         {
1495           dump_printf_loc (MSG_NOTE, vect_location,
1496                            "Symbolic number of iterations is ");
1497           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1498           dump_printf (MSG_NOTE, "\n");
1499         }
1500     }
1501
1502   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1503   if (inner_loop_cond)
1504     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1505       = loop_exit_ctrl_vec_info_type;
1506
1507   gcc_assert (!loop->aux);
1508   loop->aux = loop_vinfo;
1509   return loop_vinfo;
1510 }
1511
1512
1513
1514 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1515    statements update the vectorization factor.  */
1516
1517 static void
1518 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1519 {
1520   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1521   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1522   int nbbs = loop->num_nodes;
1523   unsigned int vectorization_factor;
1524   int i;
1525
1526   if (dump_enabled_p ())
1527     dump_printf_loc (MSG_NOTE, vect_location,
1528                      "=== vect_update_vf_for_slp ===\n");
1529
1530   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1531   gcc_assert (vectorization_factor != 0);
1532
1533   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1534      vectorization factor of the loop is the unrolling factor required by
1535      the SLP instances.  If that unrolling factor is 1, we say, that we
1536      perform pure SLP on loop - cross iteration parallelism is not
1537      exploited.  */
1538   bool only_slp_in_loop = true;
1539   for (i = 0; i < nbbs; i++)
1540     {
1541       basic_block bb = bbs[i];
1542       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1543            gsi_next (&si))
1544         {
1545           gimple *stmt = gsi_stmt (si);
1546           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1547           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1548               && STMT_VINFO_RELATED_STMT (stmt_info))
1549             {
1550               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1551               stmt_info = vinfo_for_stmt (stmt);
1552             }
1553           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1554                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1555               && !PURE_SLP_STMT (stmt_info))
1556             /* STMT needs both SLP and loop-based vectorization.  */
1557             only_slp_in_loop = false;
1558         }
1559     }
1560
1561   if (only_slp_in_loop)
1562     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1563   else
1564     vectorization_factor
1565       = least_common_multiple (vectorization_factor,
1566                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1567
1568   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1569   if (dump_enabled_p ())
1570     dump_printf_loc (MSG_NOTE, vect_location,
1571                      "Updating vectorization factor to %d\n",
1572                      vectorization_factor);
1573 }
1574
1575 /* Function vect_analyze_loop_operations.
1576
1577    Scan the loop stmts and make sure they are all vectorizable.  */
1578
1579 static bool
1580 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1581 {
1582   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1583   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1584   int nbbs = loop->num_nodes;
1585   int i;
1586   stmt_vec_info stmt_info;
1587   bool need_to_vectorize = false;
1588   bool ok;
1589
1590   if (dump_enabled_p ())
1591     dump_printf_loc (MSG_NOTE, vect_location,
1592                      "=== vect_analyze_loop_operations ===\n");
1593
1594   for (i = 0; i < nbbs; i++)
1595     {
1596       basic_block bb = bbs[i];
1597
1598       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1599            gsi_next (&si))
1600         {
1601           gphi *phi = si.phi ();
1602           ok = true;
1603
1604           stmt_info = vinfo_for_stmt (phi);
1605           if (dump_enabled_p ())
1606             {
1607               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1608               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1609               dump_printf (MSG_NOTE, "\n");
1610             }
1611           if (virtual_operand_p (gimple_phi_result (phi)))
1612             continue;
1613
1614           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1615              (i.e., a phi in the tail of the outer-loop).  */
1616           if (! is_loop_header_bb_p (bb))
1617             {
1618               /* FORNOW: we currently don't support the case that these phis
1619                  are not used in the outerloop (unless it is double reduction,
1620                  i.e., this phi is vect_reduction_def), cause this case
1621                  requires to actually do something here.  */
1622               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1623                    || STMT_VINFO_LIVE_P (stmt_info))
1624                   && STMT_VINFO_DEF_TYPE (stmt_info)
1625                      != vect_double_reduction_def)
1626                 {
1627                   if (dump_enabled_p ())
1628                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1629                                      "Unsupported loop-closed phi in "
1630                                      "outer-loop.\n");
1631                   return false;
1632                 }
1633
1634               /* If PHI is used in the outer loop, we check that its operand
1635                  is defined in the inner loop.  */
1636               if (STMT_VINFO_RELEVANT_P (stmt_info))
1637                 {
1638                   tree phi_op;
1639                   gimple *op_def_stmt;
1640
1641                   if (gimple_phi_num_args (phi) != 1)
1642                     return false;
1643
1644                   phi_op = PHI_ARG_DEF (phi, 0);
1645                   if (TREE_CODE (phi_op) != SSA_NAME)
1646                     return false;
1647
1648                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1649                   if (gimple_nop_p (op_def_stmt)
1650                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1651                       || !vinfo_for_stmt (op_def_stmt))
1652                     return false;
1653
1654                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1655                         != vect_used_in_outer
1656                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1657                            != vect_used_in_outer_by_reduction)
1658                     return false;
1659                 }
1660
1661               continue;
1662             }
1663
1664           gcc_assert (stmt_info);
1665
1666           if (STMT_VINFO_LIVE_P (stmt_info))
1667             {
1668               /* FORNOW: not yet supported.  */
1669               if (dump_enabled_p ())
1670                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1671                                  "not vectorized: value used after loop.\n");
1672               return false;
1673             }
1674
1675           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1676               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1677             {
1678               /* A scalar-dependence cycle that we don't support.  */
1679               if (dump_enabled_p ())
1680                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1681                                  "not vectorized: scalar dependence cycle.\n");
1682               return false;
1683             }
1684
1685           if (STMT_VINFO_RELEVANT_P (stmt_info))
1686             {
1687               need_to_vectorize = true;
1688               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1689                 ok = vectorizable_induction (phi, NULL, NULL);
1690             }
1691
1692           if (!ok)
1693             {
1694               if (dump_enabled_p ())
1695                 {
1696                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1697                                    "not vectorized: relevant phi not "
1698                                    "supported: ");
1699                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1700                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1701                 }
1702               return false;
1703             }
1704         }
1705
1706       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1707            gsi_next (&si))
1708         {
1709           gimple *stmt = gsi_stmt (si);
1710           if (!gimple_clobber_p (stmt)
1711               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1712             return false;
1713         }
1714     } /* bbs */
1715
1716   /* All operations in the loop are either irrelevant (deal with loop
1717      control, or dead), or only used outside the loop and can be moved
1718      out of the loop (e.g. invariants, inductions).  The loop can be
1719      optimized away by scalar optimizations.  We're better off not
1720      touching this loop.  */
1721   if (!need_to_vectorize)
1722     {
1723       if (dump_enabled_p ())
1724         dump_printf_loc (MSG_NOTE, vect_location,
1725                          "All the computation can be taken out of the loop.\n");
1726       if (dump_enabled_p ())
1727         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1728                          "not vectorized: redundant loop. no profit to "
1729                          "vectorize.\n");
1730       return false;
1731     }
1732
1733   return true;
1734 }
1735
1736
1737 /* Function vect_analyze_loop_2.
1738
1739    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1740    for it.  The different analyses will record information in the
1741    loop_vec_info struct.  */
1742 static bool
1743 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1744 {
1745   bool ok;
1746   int max_vf = MAX_VECTORIZATION_FACTOR;
1747   int min_vf = 2;
1748   unsigned int n_stmts = 0;
1749
1750   /* The first group of checks is independent of the vector size.  */
1751   fatal = true;
1752
1753   /* Find all data references in the loop (which correspond to vdefs/vuses)
1754      and analyze their evolution in the loop.  */
1755
1756   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1757
1758   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1759   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1760     {
1761       if (dump_enabled_p ())
1762         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763                          "not vectorized: loop contains function calls"
1764                          " or data references that cannot be analyzed\n");
1765       return false;
1766     }
1767
1768   for (unsigned i = 0; i < loop->num_nodes; i++)
1769     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1770          !gsi_end_p (gsi); gsi_next (&gsi))
1771       {
1772         gimple *stmt = gsi_stmt (gsi);
1773         if (is_gimple_debug (stmt))
1774           continue;
1775         ++n_stmts;
1776         if (!find_data_references_in_stmt (loop, stmt,
1777                                            &LOOP_VINFO_DATAREFS (loop_vinfo)))
1778           {
1779             if (is_gimple_call (stmt) && loop->safelen)
1780               {
1781                 tree fndecl = gimple_call_fndecl (stmt), op;
1782                 if (fndecl != NULL_TREE)
1783                   {
1784                     cgraph_node *node = cgraph_node::get (fndecl);
1785                     if (node != NULL && node->simd_clones != NULL)
1786                       {
1787                         unsigned int j, n = gimple_call_num_args (stmt);
1788                         for (j = 0; j < n; j++)
1789                           {
1790                             op = gimple_call_arg (stmt, j);
1791                             if (DECL_P (op)
1792                                 || (REFERENCE_CLASS_P (op)
1793                                     && get_base_address (op)))
1794                               break;
1795                           }
1796                         op = gimple_call_lhs (stmt);
1797                         /* Ignore #pragma omp declare simd functions
1798                            if they don't have data references in the
1799                            call stmt itself.  */
1800                         if (j == n
1801                             && !(op
1802                                  && (DECL_P (op)
1803                                      || (REFERENCE_CLASS_P (op)
1804                                          && get_base_address (op)))))
1805                           continue;
1806                       }
1807                   }
1808               }
1809             if (dump_enabled_p ())
1810               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1811                                "not vectorized: loop contains function "
1812                                "calls or data references that cannot "
1813                                "be analyzed\n");
1814             return false;
1815           }
1816       }
1817
1818   /* Analyze the data references and also adjust the minimal
1819      vectorization factor according to the loads and stores.  */
1820
1821   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1822   if (!ok)
1823     {
1824       if (dump_enabled_p ())
1825         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1826                          "bad data references.\n");
1827       return false;
1828     }
1829
1830   /* Classify all cross-iteration scalar data-flow cycles.
1831      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1832   vect_analyze_scalar_cycles (loop_vinfo);
1833
1834   vect_pattern_recog (loop_vinfo);
1835
1836   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1837
1838   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1839      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1840
1841   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1842   if (!ok)
1843     {
1844       if (dump_enabled_p ())
1845         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846                          "bad data access.\n");
1847       return false;
1848     }
1849
1850   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1851
1852   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1853   if (!ok)
1854     {
1855       if (dump_enabled_p ())
1856         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857                          "unexpected pattern.\n");
1858       return false;
1859     }
1860
1861   /* While the rest of the analysis below depends on it in some way.  */
1862   fatal = false;
1863
1864   /* Analyze data dependences between the data-refs in the loop
1865      and adjust the maximum vectorization factor according to
1866      the dependences.
1867      FORNOW: fail at the first data dependence that we encounter.  */
1868
1869   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1870   if (!ok
1871       || max_vf < min_vf)
1872     {
1873       if (dump_enabled_p ())
1874             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1875                              "bad data dependence.\n");
1876       return false;
1877     }
1878
1879   ok = vect_determine_vectorization_factor (loop_vinfo);
1880   if (!ok)
1881     {
1882       if (dump_enabled_p ())
1883         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1884                          "can't determine vectorization factor.\n");
1885       return false;
1886     }
1887   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1888     {
1889       if (dump_enabled_p ())
1890         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1891                          "bad data dependence.\n");
1892       return false;
1893     }
1894
1895   /* Compute the scalar iteration cost.  */
1896   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1897
1898   int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1899   HOST_WIDE_INT estimated_niter;
1900   unsigned th;
1901   int min_scalar_loop_bound;
1902
1903   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1904   ok = vect_analyze_slp (loop_vinfo, n_stmts);
1905   if (!ok)
1906     return false;
1907
1908   /* If there are any SLP instances mark them as pure_slp.  */
1909   bool slp = vect_make_slp_decision (loop_vinfo);
1910   if (slp)
1911     {
1912       /* Find stmts that need to be both vectorized and SLPed.  */
1913       vect_detect_hybrid_slp (loop_vinfo);
1914
1915       /* Update the vectorization factor based on the SLP decision.  */
1916       vect_update_vf_for_slp (loop_vinfo);
1917     }
1918
1919   /* This is the point where we can re-start analysis with SLP forced off.  */
1920 start_over:
1921
1922   /* Now the vectorization factor is final.  */
1923   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1924   gcc_assert (vectorization_factor != 0);
1925
1926   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1927     dump_printf_loc (MSG_NOTE, vect_location,
1928                      "vectorization_factor = %d, niters = "
1929                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1930                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1931
1932   HOST_WIDE_INT max_niter
1933     = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1934   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1935        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1936       || (max_niter != -1
1937           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1938     {
1939       if (dump_enabled_p ())
1940         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941                          "not vectorized: iteration count smaller than "
1942                          "vectorization factor.\n");
1943       return false;
1944     }
1945
1946   /* Analyze the alignment of the data-refs in the loop.
1947      Fail if a data reference is found that cannot be vectorized.  */
1948
1949   ok = vect_analyze_data_refs_alignment (loop_vinfo);
1950   if (!ok)
1951     {
1952       if (dump_enabled_p ())
1953         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954                          "bad data alignment.\n");
1955       return false;
1956     }
1957
1958   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1959      It is important to call pruning after vect_analyze_data_ref_accesses,
1960      since we use grouping information gathered by interleaving analysis.  */
1961   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1962   if (!ok)
1963     {
1964       if (dump_enabled_p ())
1965         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966                          "number of versioning for alias "
1967                          "run-time tests exceeds %d "
1968                          "(--param vect-max-version-for-alias-checks)\n",
1969                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1970       return false;
1971     }
1972
1973   /* This pass will decide on using loop versioning and/or loop peeling in
1974      order to enhance the alignment of data references in the loop.  */
1975   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1976   if (!ok)
1977     {
1978       if (dump_enabled_p ())
1979         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1980                          "bad data alignment.\n");
1981       return false;
1982     }
1983
1984   if (slp)
1985     {
1986       /* Analyze operations in the SLP instances.  Note this may
1987          remove unsupported SLP instances which makes the above
1988          SLP kind detection invalid.  */
1989       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
1990       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1991                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1992       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
1993         goto again;
1994     }
1995
1996   /* Scan all the remaining operations in the loop that are not subject
1997      to SLP and make sure they are vectorizable.  */
1998   ok = vect_analyze_loop_operations (loop_vinfo);
1999   if (!ok)
2000     {
2001       if (dump_enabled_p ())
2002         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2003                          "bad operation or unsupported loop bound.\n");
2004       return false;
2005     }
2006
2007   /* Analyze cost.  Decide if worth while to vectorize.  */
2008   int min_profitable_estimate, min_profitable_iters;
2009   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2010                                       &min_profitable_estimate);
2011
2012   if (min_profitable_iters < 0)
2013     {
2014       if (dump_enabled_p ())
2015         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2016                          "not vectorized: vectorization not profitable.\n");
2017       if (dump_enabled_p ())
2018         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2019                          "not vectorized: vector version will never be "
2020                          "profitable.\n");
2021       goto again;
2022     }
2023
2024   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2025                             * vectorization_factor) - 1);
2026
2027   /* Use the cost model only if it is more conservative than user specified
2028      threshold.  */
2029   th = (unsigned) min_scalar_loop_bound;
2030   if (min_profitable_iters
2031       && (!min_scalar_loop_bound
2032           || min_profitable_iters > min_scalar_loop_bound))
2033     th = (unsigned) min_profitable_iters;
2034
2035   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2036
2037   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2038       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2039     {
2040       if (dump_enabled_p ())
2041         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042                          "not vectorized: vectorization not profitable.\n");
2043       if (dump_enabled_p ())
2044         dump_printf_loc (MSG_NOTE, vect_location,
2045                          "not vectorized: iteration count smaller than user "
2046                          "specified loop bound parameter or minimum profitable "
2047                          "iterations (whichever is more conservative).\n");
2048       goto again;
2049     }
2050
2051   estimated_niter
2052     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2053   if (estimated_niter != -1
2054       && ((unsigned HOST_WIDE_INT) estimated_niter
2055           <= MAX (th, (unsigned)min_profitable_estimate)))
2056     {
2057       if (dump_enabled_p ())
2058         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2059                          "not vectorized: estimated iteration count too "
2060                          "small.\n");
2061       if (dump_enabled_p ())
2062         dump_printf_loc (MSG_NOTE, vect_location,
2063                          "not vectorized: estimated iteration count smaller "
2064                          "than specified loop bound parameter or minimum "
2065                          "profitable iterations (whichever is more "
2066                          "conservative).\n");
2067       goto again;
2068     }
2069
2070   /* Decide whether we need to create an epilogue loop to handle
2071      remaining scalar iterations.  */
2072   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2073         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2074        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2075
2076   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2077       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2078     {
2079       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2080                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2081           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2082         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2083     }
2084   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2085            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2086                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2087                /* In case of versioning, check if the maximum number of
2088                   iterations is greater than th.  If they are identical,
2089                   the epilogue is unnecessary.  */
2090                && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
2091                     && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2092                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2093     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2094
2095   /* If an epilogue loop is required make sure we can create one.  */
2096   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2097       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2098     {
2099       if (dump_enabled_p ())
2100         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2101       if (!vect_can_advance_ivs_p (loop_vinfo)
2102           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2103                                            single_exit (LOOP_VINFO_LOOP
2104                                                          (loop_vinfo))))
2105         {
2106           if (dump_enabled_p ())
2107             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2108                              "not vectorized: can't create required "
2109                              "epilog loop\n");
2110           goto again;
2111         }
2112     }
2113
2114   gcc_assert (vectorization_factor
2115               == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2116
2117   /* Ok to vectorize!  */
2118   return true;
2119
2120 again:
2121   /* Try again with SLP forced off but if we didn't do any SLP there is
2122      no point in re-trying.  */
2123   if (!slp)
2124     return false;
2125
2126   /* Likewise if the grouped loads or stores in the SLP cannot be handled
2127      via interleaving or lane instructions or if there were any SLP
2128      reductions.  */
2129   slp_instance instance;
2130   slp_tree node;
2131   unsigned i, j;
2132   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2133     {
2134       stmt_vec_info vinfo;
2135       vinfo = vinfo_for_stmt
2136           (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2137       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2138         return false;
2139       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2140       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
2141       tree vectype = STMT_VINFO_VECTYPE (vinfo);
2142       if (! vect_store_lanes_supported (vectype, size)
2143           && ! vect_grouped_store_supported (vectype, size))
2144         return false;
2145       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2146         {
2147           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2148           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2149           size = STMT_VINFO_GROUP_SIZE (vinfo);
2150           vectype = STMT_VINFO_VECTYPE (vinfo);
2151           if (! vect_load_lanes_supported (vectype, size)
2152               && ! vect_grouped_load_supported (vectype, size))
2153             return false;
2154         }
2155     }
2156
2157   if (dump_enabled_p ())
2158     dump_printf_loc (MSG_NOTE, vect_location,
2159                      "re-trying with SLP disabled\n");
2160
2161   /* Roll back state appropriately.  No SLP this time.  */
2162   slp = false;
2163   /* Restore vectorization factor as it were without SLP.  */
2164   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2165   /* Free the SLP instances.  */
2166   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2167     vect_free_slp_instance (instance);
2168   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2169   /* Reset SLP type to loop_vect on all stmts.  */
2170   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2171     {
2172       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2173       for (gimple_stmt_iterator si = gsi_start_bb (bb);
2174            !gsi_end_p (si); gsi_next (&si))
2175         {
2176           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2177           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2178             {
2179               gcc_assert (STMT_SLP_TYPE (stmt_info) == loop_vect);
2180               stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2181               for (gimple_stmt_iterator pi
2182                      = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2183                    !gsi_end_p (pi); gsi_next (&pi))
2184                 {
2185                   gimple *pstmt = gsi_stmt (pi);
2186                   STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2187                 }
2188             }
2189           STMT_SLP_TYPE (stmt_info) = loop_vect;
2190         }
2191     }
2192   /* Free optimized alias test DDRS.  */
2193   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2194   /* Reset target cost data.  */
2195   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2196   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2197     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2198   /* Reset assorted flags.  */
2199   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2200   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2201   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2202
2203   goto start_over;
2204 }
2205
2206 /* Function vect_analyze_loop.
2207
2208    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2209    for it.  The different analyses will record information in the
2210    loop_vec_info struct.  */
2211 loop_vec_info
2212 vect_analyze_loop (struct loop *loop)
2213 {
2214   loop_vec_info loop_vinfo;
2215   unsigned int vector_sizes;
2216
2217   /* Autodetect first vector size we try.  */
2218   current_vector_size = 0;
2219   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2220
2221   if (dump_enabled_p ())
2222     dump_printf_loc (MSG_NOTE, vect_location,
2223                      "===== analyze_loop_nest =====\n");
2224
2225   if (loop_outer (loop)
2226       && loop_vec_info_for_loop (loop_outer (loop))
2227       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2228     {
2229       if (dump_enabled_p ())
2230         dump_printf_loc (MSG_NOTE, vect_location,
2231                          "outer-loop already vectorized.\n");
2232       return NULL;
2233     }
2234
2235   while (1)
2236     {
2237       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2238       loop_vinfo = vect_analyze_loop_form (loop);
2239       if (!loop_vinfo)
2240         {
2241           if (dump_enabled_p ())
2242             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243                              "bad loop form.\n");
2244           return NULL;
2245         }
2246
2247       bool fatal = false;
2248       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2249         {
2250           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2251
2252           return loop_vinfo;
2253         }
2254
2255       destroy_loop_vec_info (loop_vinfo, true);
2256
2257       vector_sizes &= ~current_vector_size;
2258       if (fatal
2259           || vector_sizes == 0
2260           || current_vector_size == 0)
2261         return NULL;
2262
2263       /* Try the next biggest vector size.  */
2264       current_vector_size = 1 << floor_log2 (vector_sizes);
2265       if (dump_enabled_p ())
2266         dump_printf_loc (MSG_NOTE, vect_location,
2267                          "***** Re-trying analysis with "
2268                          "vector size %d\n", current_vector_size);
2269     }
2270 }
2271
2272
2273 /* Function reduction_code_for_scalar_code
2274
2275    Input:
2276    CODE - tree_code of a reduction operations.
2277
2278    Output:
2279    REDUC_CODE - the corresponding tree-code to be used to reduce the
2280       vector of partial results into a single scalar result, or ERROR_MARK
2281       if the operation is a supported reduction operation, but does not have
2282       such a tree-code.
2283
2284    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2285
2286 static bool
2287 reduction_code_for_scalar_code (enum tree_code code,
2288                                 enum tree_code *reduc_code)
2289 {
2290   switch (code)
2291     {
2292       case MAX_EXPR:
2293         *reduc_code = REDUC_MAX_EXPR;
2294         return true;
2295
2296       case MIN_EXPR:
2297         *reduc_code = REDUC_MIN_EXPR;
2298         return true;
2299
2300       case PLUS_EXPR:
2301         *reduc_code = REDUC_PLUS_EXPR;
2302         return true;
2303
2304       case MULT_EXPR:
2305       case MINUS_EXPR:
2306       case BIT_IOR_EXPR:
2307       case BIT_XOR_EXPR:
2308       case BIT_AND_EXPR:
2309         *reduc_code = ERROR_MARK;
2310         return true;
2311
2312       default:
2313        return false;
2314     }
2315 }
2316
2317
2318 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2319    STMT is printed with a message MSG. */
2320
2321 static void
2322 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2323 {
2324   dump_printf_loc (msg_type, vect_location, "%s", msg);
2325   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2326   dump_printf (msg_type, "\n");
2327 }
2328
2329
2330 /* Detect SLP reduction of the form:
2331
2332    #a1 = phi <a5, a0>
2333    a2 = operation (a1)
2334    a3 = operation (a2)
2335    a4 = operation (a3)
2336    a5 = operation (a4)
2337
2338    #a = phi <a5>
2339
2340    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2341    FIRST_STMT is the first reduction stmt in the chain
2342    (a2 = operation (a1)).
2343
2344    Return TRUE if a reduction chain was detected.  */
2345
2346 static bool
2347 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2348                        gimple *first_stmt)
2349 {
2350   struct loop *loop = (gimple_bb (phi))->loop_father;
2351   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2352   enum tree_code code;
2353   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2354   stmt_vec_info use_stmt_info, current_stmt_info;
2355   tree lhs;
2356   imm_use_iterator imm_iter;
2357   use_operand_p use_p;
2358   int nloop_uses, size = 0, n_out_of_loop_uses;
2359   bool found = false;
2360
2361   if (loop != vect_loop)
2362     return false;
2363
2364   lhs = PHI_RESULT (phi);
2365   code = gimple_assign_rhs_code (first_stmt);
2366   while (1)
2367     {
2368       nloop_uses = 0;
2369       n_out_of_loop_uses = 0;
2370       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2371         {
2372           gimple *use_stmt = USE_STMT (use_p);
2373           if (is_gimple_debug (use_stmt))
2374             continue;
2375
2376           /* Check if we got back to the reduction phi.  */
2377           if (use_stmt == phi)
2378             {
2379               loop_use_stmt = use_stmt;
2380               found = true;
2381               break;
2382             }
2383
2384           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2385             {
2386               loop_use_stmt = use_stmt;
2387               nloop_uses++;
2388             }
2389            else
2390              n_out_of_loop_uses++;
2391
2392            /* There are can be either a single use in the loop or two uses in
2393               phi nodes.  */
2394            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2395              return false;
2396         }
2397
2398       if (found)
2399         break;
2400
2401       /* We reached a statement with no loop uses.  */
2402       if (nloop_uses == 0)
2403         return false;
2404
2405       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2406       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2407         return false;
2408
2409       if (!is_gimple_assign (loop_use_stmt)
2410           || code != gimple_assign_rhs_code (loop_use_stmt)
2411           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2412         return false;
2413
2414       /* Insert USE_STMT into reduction chain.  */
2415       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2416       if (current_stmt)
2417         {
2418           current_stmt_info = vinfo_for_stmt (current_stmt);
2419           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2420           GROUP_FIRST_ELEMENT (use_stmt_info)
2421             = GROUP_FIRST_ELEMENT (current_stmt_info);
2422         }
2423       else
2424         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2425
2426       lhs = gimple_assign_lhs (loop_use_stmt);
2427       current_stmt = loop_use_stmt;
2428       size++;
2429    }
2430
2431   if (!found || loop_use_stmt != phi || size < 2)
2432     return false;
2433
2434   /* Swap the operands, if needed, to make the reduction operand be the second
2435      operand.  */
2436   lhs = PHI_RESULT (phi);
2437   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2438   while (next_stmt)
2439     {
2440       if (gimple_assign_rhs2 (next_stmt) == lhs)
2441         {
2442           tree op = gimple_assign_rhs1 (next_stmt);
2443           gimple *def_stmt = NULL;
2444
2445           if (TREE_CODE (op) == SSA_NAME)
2446             def_stmt = SSA_NAME_DEF_STMT (op);
2447
2448           /* Check that the other def is either defined in the loop
2449              ("vect_internal_def"), or it's an induction (defined by a
2450              loop-header phi-node).  */
2451           if (def_stmt
2452               && gimple_bb (def_stmt)
2453               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2454               && (is_gimple_assign (def_stmt)
2455                   || is_gimple_call (def_stmt)
2456                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2457                            == vect_induction_def
2458                   || (gimple_code (def_stmt) == GIMPLE_PHI
2459                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2460                                   == vect_internal_def
2461                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2462             {
2463               lhs = gimple_assign_lhs (next_stmt);
2464               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2465               continue;
2466             }
2467
2468           return false;
2469         }
2470       else
2471         {
2472           tree op = gimple_assign_rhs2 (next_stmt);
2473           gimple *def_stmt = NULL;
2474
2475           if (TREE_CODE (op) == SSA_NAME)
2476             def_stmt = SSA_NAME_DEF_STMT (op);
2477
2478           /* Check that the other def is either defined in the loop
2479             ("vect_internal_def"), or it's an induction (defined by a
2480             loop-header phi-node).  */
2481           if (def_stmt
2482               && gimple_bb (def_stmt)
2483               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2484               && (is_gimple_assign (def_stmt)
2485                   || is_gimple_call (def_stmt)
2486                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2487                               == vect_induction_def
2488                   || (gimple_code (def_stmt) == GIMPLE_PHI
2489                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2490                                   == vect_internal_def
2491                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2492             {
2493               if (dump_enabled_p ())
2494                 {
2495                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2496                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2497                   dump_printf (MSG_NOTE, "\n");
2498                 }
2499
2500               swap_ssa_operands (next_stmt,
2501                                  gimple_assign_rhs1_ptr (next_stmt),
2502                                  gimple_assign_rhs2_ptr (next_stmt));
2503               update_stmt (next_stmt);
2504
2505               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2506                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2507             }
2508           else
2509             return false;
2510         }
2511
2512       lhs = gimple_assign_lhs (next_stmt);
2513       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2514     }
2515
2516   /* Save the chain for further analysis in SLP detection.  */
2517   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2518   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2519   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2520
2521   return true;
2522 }
2523
2524
2525 /* Function vect_is_simple_reduction_1
2526
2527    (1) Detect a cross-iteration def-use cycle that represents a simple
2528    reduction computation.  We look for the following pattern:
2529
2530    loop_header:
2531      a1 = phi < a0, a2 >
2532      a3 = ...
2533      a2 = operation (a3, a1)
2534
2535    or
2536
2537    a3 = ...
2538    loop_header:
2539      a1 = phi < a0, a2 >
2540      a2 = operation (a3, a1)
2541
2542    such that:
2543    1. operation is commutative and associative and it is safe to
2544       change the order of the computation (if CHECK_REDUCTION is true)
2545    2. no uses for a2 in the loop (a2 is used out of the loop)
2546    3. no uses of a1 in the loop besides the reduction operation
2547    4. no uses of a1 outside the loop.
2548
2549    Conditions 1,4 are tested here.
2550    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2551
2552    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2553    nested cycles, if CHECK_REDUCTION is false.
2554
2555    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2556    reductions:
2557
2558      a1 = phi < a0, a2 >
2559      inner loop (def of a3)
2560      a2 = phi < a3 >
2561
2562    (4) Detect condition expressions, ie:
2563      for (int i = 0; i < N; i++)
2564        if (a[i] < val)
2565         ret_val = a[i];
2566
2567 */
2568
2569 static gimple *
2570 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2571                           bool check_reduction, bool *double_reduc,
2572                           bool need_wrapping_integral_overflow,
2573                           enum vect_reduction_type *v_reduc_type)
2574 {
2575   struct loop *loop = (gimple_bb (phi))->loop_father;
2576   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2577   edge latch_e = loop_latch_edge (loop);
2578   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2579   gimple *def_stmt, *def1 = NULL, *def2 = NULL;
2580   enum tree_code orig_code, code;
2581   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2582   tree type;
2583   int nloop_uses;
2584   tree name;
2585   imm_use_iterator imm_iter;
2586   use_operand_p use_p;
2587   bool phi_def;
2588
2589   *double_reduc = false;
2590   *v_reduc_type = TREE_CODE_REDUCTION;
2591
2592   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2593      otherwise, we assume outer loop vectorization.  */
2594   gcc_assert ((check_reduction && loop == vect_loop)
2595               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2596
2597   name = PHI_RESULT (phi);
2598   /* ???  If there are no uses of the PHI result the inner loop reduction
2599      won't be detected as possibly double-reduction by vectorizable_reduction
2600      because that tries to walk the PHI arg from the preheader edge which
2601      can be constant.  See PR60382.  */
2602   if (has_zero_uses (name))
2603     return NULL;
2604   nloop_uses = 0;
2605   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2606     {
2607       gimple *use_stmt = USE_STMT (use_p);
2608       if (is_gimple_debug (use_stmt))
2609         continue;
2610
2611       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2612         {
2613           if (dump_enabled_p ())
2614             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2615                              "intermediate value used outside loop.\n");
2616
2617           return NULL;
2618         }
2619
2620       nloop_uses++;
2621       if (nloop_uses > 1)
2622         {
2623           if (dump_enabled_p ())
2624             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2625                              "reduction used in loop.\n");
2626           return NULL;
2627         }
2628     }
2629
2630   if (TREE_CODE (loop_arg) != SSA_NAME)
2631     {
2632       if (dump_enabled_p ())
2633         {
2634           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2635                            "reduction: not ssa_name: ");
2636           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2637           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2638         }
2639       return NULL;
2640     }
2641
2642   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2643   if (!def_stmt)
2644     {
2645       if (dump_enabled_p ())
2646         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2647                          "reduction: no def_stmt.\n");
2648       return NULL;
2649     }
2650
2651   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2652     {
2653       if (dump_enabled_p ())
2654         {
2655           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2656           dump_printf (MSG_NOTE, "\n");
2657         }
2658       return NULL;
2659     }
2660
2661   if (is_gimple_assign (def_stmt))
2662     {
2663       name = gimple_assign_lhs (def_stmt);
2664       phi_def = false;
2665     }
2666   else
2667     {
2668       name = PHI_RESULT (def_stmt);
2669       phi_def = true;
2670     }
2671
2672   nloop_uses = 0;
2673   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2674     {
2675       gimple *use_stmt = USE_STMT (use_p);
2676       if (is_gimple_debug (use_stmt))
2677         continue;
2678       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2679         nloop_uses++;
2680       if (nloop_uses > 1)
2681         {
2682           if (dump_enabled_p ())
2683             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2684                              "reduction used in loop.\n");
2685           return NULL;
2686         }
2687     }
2688
2689   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2690      defined in the inner loop.  */
2691   if (phi_def)
2692     {
2693       op1 = PHI_ARG_DEF (def_stmt, 0);
2694
2695       if (gimple_phi_num_args (def_stmt) != 1
2696           || TREE_CODE (op1) != SSA_NAME)
2697         {
2698           if (dump_enabled_p ())
2699             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2700                              "unsupported phi node definition.\n");
2701
2702           return NULL;
2703         }
2704
2705       def1 = SSA_NAME_DEF_STMT (op1);
2706       if (gimple_bb (def1)
2707           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2708           && loop->inner
2709           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2710           && is_gimple_assign (def1))
2711         {
2712           if (dump_enabled_p ())
2713             report_vect_op (MSG_NOTE, def_stmt,
2714                             "detected double reduction: ");
2715
2716           *double_reduc = true;
2717           return def_stmt;
2718         }
2719
2720       return NULL;
2721     }
2722
2723   code = orig_code = gimple_assign_rhs_code (def_stmt);
2724
2725   /* We can handle "res -= x[i]", which is non-associative by
2726      simply rewriting this into "res += -x[i]".  Avoid changing
2727      gimple instruction for the first simple tests and only do this
2728      if we're allowed to change code at all.  */
2729   if (code == MINUS_EXPR
2730       && (op1 = gimple_assign_rhs1 (def_stmt))
2731       && TREE_CODE (op1) == SSA_NAME
2732       && SSA_NAME_DEF_STMT (op1) == phi)
2733     code = PLUS_EXPR;
2734
2735   if (check_reduction)
2736     {
2737       if (code == COND_EXPR)
2738         *v_reduc_type = COND_REDUCTION;
2739       else if (!commutative_tree_code (code) || !associative_tree_code (code))
2740         {
2741           if (dump_enabled_p ())
2742             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2743                             "reduction: not commutative/associative: ");
2744           return NULL;
2745         }
2746     }
2747
2748   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2749     {
2750       if (code != COND_EXPR)
2751         {
2752           if (dump_enabled_p ())
2753             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2754                             "reduction: not binary operation: ");
2755
2756           return NULL;
2757         }
2758
2759       op3 = gimple_assign_rhs1 (def_stmt);
2760       if (COMPARISON_CLASS_P (op3))
2761         {
2762           op4 = TREE_OPERAND (op3, 1);
2763           op3 = TREE_OPERAND (op3, 0);
2764         }
2765
2766       op1 = gimple_assign_rhs2 (def_stmt);
2767       op2 = gimple_assign_rhs3 (def_stmt);
2768
2769       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2770         {
2771           if (dump_enabled_p ())
2772             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2773                             "reduction: uses not ssa_names: ");
2774
2775           return NULL;
2776         }
2777     }
2778   else
2779     {
2780       op1 = gimple_assign_rhs1 (def_stmt);
2781       op2 = gimple_assign_rhs2 (def_stmt);
2782
2783       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2784         {
2785           if (dump_enabled_p ())
2786             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2787                             "reduction: uses not ssa_names: ");
2788
2789           return NULL;
2790         }
2791    }
2792
2793   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2794   if ((TREE_CODE (op1) == SSA_NAME
2795        && !types_compatible_p (type,TREE_TYPE (op1)))
2796       || (TREE_CODE (op2) == SSA_NAME
2797           && !types_compatible_p (type, TREE_TYPE (op2)))
2798       || (op3 && TREE_CODE (op3) == SSA_NAME
2799           && !types_compatible_p (type, TREE_TYPE (op3)))
2800       || (op4 && TREE_CODE (op4) == SSA_NAME
2801           && !types_compatible_p (type, TREE_TYPE (op4))))
2802     {
2803       if (dump_enabled_p ())
2804         {
2805           dump_printf_loc (MSG_NOTE, vect_location,
2806                            "reduction: multiple types: operation type: ");
2807           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2808           dump_printf (MSG_NOTE, ", operands types: ");
2809           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2810                              TREE_TYPE (op1));
2811           dump_printf (MSG_NOTE, ",");
2812           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2813                              TREE_TYPE (op2));
2814           if (op3)
2815             {
2816               dump_printf (MSG_NOTE, ",");
2817               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2818                                  TREE_TYPE (op3));
2819             }
2820
2821           if (op4)
2822             {
2823               dump_printf (MSG_NOTE, ",");
2824               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2825                                  TREE_TYPE (op4));
2826             }
2827           dump_printf (MSG_NOTE, "\n");
2828         }
2829
2830       return NULL;
2831     }
2832
2833   /* Check that it's ok to change the order of the computation.
2834      Generally, when vectorizing a reduction we change the order of the
2835      computation.  This may change the behavior of the program in some
2836      cases, so we need to check that this is ok.  One exception is when
2837      vectorizing an outer-loop: the inner-loop is executed sequentially,
2838      and therefore vectorizing reductions in the inner-loop during
2839      outer-loop vectorization is safe.  */
2840
2841   if (*v_reduc_type != COND_REDUCTION)
2842     {
2843       /* CHECKME: check for !flag_finite_math_only too?  */
2844       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2845           && check_reduction)
2846         {
2847           /* Changing the order of operations changes the semantics.  */
2848           if (dump_enabled_p ())
2849             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2850                         "reduction: unsafe fp math optimization: ");
2851           return NULL;
2852         }
2853       else if (INTEGRAL_TYPE_P (type) && check_reduction)
2854         {
2855           if (!operation_no_trapping_overflow (type, code))
2856             {
2857               /* Changing the order of operations changes the semantics.  */
2858               if (dump_enabled_p ())
2859                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2860                                 "reduction: unsafe int math optimization"
2861                                 " (overflow traps): ");
2862               return NULL;
2863             }
2864           if (need_wrapping_integral_overflow
2865               && !TYPE_OVERFLOW_WRAPS (type)
2866               && operation_can_overflow (code))
2867             {
2868               /* Changing the order of operations changes the semantics.  */
2869               if (dump_enabled_p ())
2870                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2871                                 "reduction: unsafe int math optimization"
2872                                 " (overflow doesn't wrap): ");
2873               return NULL;
2874             }
2875         }
2876       else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2877         {
2878           /* Changing the order of operations changes the semantics.  */
2879           if (dump_enabled_p ())
2880           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2881                           "reduction: unsafe fixed-point math optimization: ");
2882           return NULL;
2883         }
2884     }
2885
2886   /* Reduction is safe. We're dealing with one of the following:
2887      1) integer arithmetic and no trapv
2888      2) floating point arithmetic, and special flags permit this optimization
2889      3) nested cycle (i.e., outer loop vectorization).  */
2890   if (TREE_CODE (op1) == SSA_NAME)
2891     def1 = SSA_NAME_DEF_STMT (op1);
2892
2893   if (TREE_CODE (op2) == SSA_NAME)
2894     def2 = SSA_NAME_DEF_STMT (op2);
2895
2896   if (code != COND_EXPR
2897       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2898     {
2899       if (dump_enabled_p ())
2900         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2901       return NULL;
2902     }
2903
2904   /* Check that one def is the reduction def, defined by PHI,
2905      the other def is either defined in the loop ("vect_internal_def"),
2906      or it's an induction (defined by a loop-header phi-node).  */
2907
2908   if (def2 && def2 == phi
2909       && (code == COND_EXPR
2910           || !def1 || gimple_nop_p (def1)
2911           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2912           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2913               && (is_gimple_assign (def1)
2914                   || is_gimple_call (def1)
2915                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2916                       == vect_induction_def
2917                   || (gimple_code (def1) == GIMPLE_PHI
2918                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2919                           == vect_internal_def
2920                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2921     {
2922       if (dump_enabled_p ())
2923         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2924       return def_stmt;
2925     }
2926
2927   if (def1 && def1 == phi
2928       && (code == COND_EXPR
2929           || !def2 || gimple_nop_p (def2)
2930           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2931           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2932               && (is_gimple_assign (def2)
2933                   || is_gimple_call (def2)
2934                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2935                       == vect_induction_def
2936                   || (gimple_code (def2) == GIMPLE_PHI
2937                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2938                           == vect_internal_def
2939                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2940     {
2941       if (check_reduction
2942           && orig_code != MINUS_EXPR)
2943         {
2944           if (code == COND_EXPR)
2945             {
2946               /* No current known use where this case would be useful.  */
2947               if (dump_enabled_p ())
2948                 report_vect_op (MSG_NOTE, def_stmt,
2949                                 "detected reduction: cannot currently swap "
2950                                 "operands for cond_expr");
2951               return NULL;
2952             }
2953
2954           /* Swap operands (just for simplicity - so that the rest of the code
2955              can assume that the reduction variable is always the last (second)
2956              argument).  */
2957           if (dump_enabled_p ())
2958             report_vect_op (MSG_NOTE, def_stmt,
2959                             "detected reduction: need to swap operands: ");
2960
2961           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2962                              gimple_assign_rhs2_ptr (def_stmt));
2963
2964           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2965             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2966         }
2967       else
2968         {
2969           if (dump_enabled_p ())
2970             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2971         }
2972
2973       return def_stmt;
2974     }
2975
2976   /* Try to find SLP reduction chain.  */
2977   if (check_reduction && code != COND_EXPR
2978       && vect_is_slp_reduction (loop_info, phi, def_stmt))
2979     {
2980       if (dump_enabled_p ())
2981         report_vect_op (MSG_NOTE, def_stmt,
2982                         "reduction: detected reduction chain: ");
2983
2984       return def_stmt;
2985     }
2986
2987   if (dump_enabled_p ())
2988     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2989                     "reduction: unknown pattern: ");
2990
2991   return NULL;
2992 }
2993
2994 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2995    in-place if it enables detection of more reductions.  Arguments
2996    as there.  */
2997
2998 gimple *
2999 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
3000                              bool check_reduction, bool *double_reduc,
3001                              bool need_wrapping_integral_overflow)
3002 {
3003   enum vect_reduction_type v_reduc_type;
3004   return vect_is_simple_reduction (loop_info, phi, check_reduction,
3005                                    double_reduc,
3006                                    need_wrapping_integral_overflow,
3007                                    &v_reduc_type);
3008 }
3009
3010 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
3011 int
3012 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3013                              int *peel_iters_epilogue,
3014                              stmt_vector_for_cost *scalar_cost_vec,
3015                              stmt_vector_for_cost *prologue_cost_vec,
3016                              stmt_vector_for_cost *epilogue_cost_vec)
3017 {
3018   int retval = 0;
3019   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3020
3021   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3022     {
3023       *peel_iters_epilogue = vf/2;
3024       if (dump_enabled_p ())
3025         dump_printf_loc (MSG_NOTE, vect_location,
3026                          "cost model: epilogue peel iters set to vf/2 "
3027                          "because loop iterations are unknown .\n");
3028
3029       /* If peeled iterations are known but number of scalar loop
3030          iterations are unknown, count a taken branch per peeled loop.  */
3031       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3032                                  NULL, 0, vect_prologue);
3033       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3034                                  NULL, 0, vect_epilogue);
3035     }
3036   else
3037     {
3038       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3039       peel_iters_prologue = niters < peel_iters_prologue ?
3040                             niters : peel_iters_prologue;
3041       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
3042       /* If we need to peel for gaps, but no peeling is required, we have to
3043          peel VF iterations.  */
3044       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3045         *peel_iters_epilogue = vf;
3046     }
3047
3048   stmt_info_for_cost *si;
3049   int j;
3050   if (peel_iters_prologue)
3051     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3052       retval += record_stmt_cost (prologue_cost_vec,
3053                                   si->count * peel_iters_prologue,
3054                                   si->kind, NULL, si->misalign,
3055                                   vect_prologue);
3056   if (*peel_iters_epilogue)
3057     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3058       retval += record_stmt_cost (epilogue_cost_vec,
3059                                   si->count * *peel_iters_epilogue,
3060                                   si->kind, NULL, si->misalign,
3061                                   vect_epilogue);
3062
3063   return retval;
3064 }
3065
3066 /* Function vect_estimate_min_profitable_iters
3067
3068    Return the number of iterations required for the vector version of the
3069    loop to be profitable relative to the cost of the scalar version of the
3070    loop.  */
3071
3072 static void
3073 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3074                                     int *ret_min_profitable_niters,
3075                                     int *ret_min_profitable_estimate)
3076 {
3077   int min_profitable_iters;
3078   int min_profitable_estimate;
3079   int peel_iters_prologue;
3080   int peel_iters_epilogue;
3081   unsigned vec_inside_cost = 0;
3082   int vec_outside_cost = 0;
3083   unsigned vec_prologue_cost = 0;
3084   unsigned vec_epilogue_cost = 0;
3085   int scalar_single_iter_cost = 0;
3086   int scalar_outside_cost = 0;
3087   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3088   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3089   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3090
3091   /* Cost model disabled.  */
3092   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3093     {
3094       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3095       *ret_min_profitable_niters = 0;
3096       *ret_min_profitable_estimate = 0;
3097       return;
3098     }
3099
3100   /* Requires loop versioning tests to handle misalignment.  */
3101   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3102     {
3103       /*  FIXME: Make cost depend on complexity of individual check.  */
3104       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3105       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3106                             vect_prologue);
3107       dump_printf (MSG_NOTE,
3108                    "cost model: Adding cost of checks for loop "
3109                    "versioning to treat misalignment.\n");
3110     }
3111
3112   /* Requires loop versioning with alias checks.  */
3113   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3114     {
3115       /*  FIXME: Make cost depend on complexity of individual check.  */
3116       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3117       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3118                             vect_prologue);
3119       dump_printf (MSG_NOTE,
3120                    "cost model: Adding cost of checks for loop "
3121                    "versioning aliasing.\n");
3122     }
3123
3124   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3125       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3126     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3127                           vect_prologue);
3128
3129   /* Count statements in scalar loop.  Using this as scalar cost for a single
3130      iteration for now.
3131
3132      TODO: Add outer loop support.
3133
3134      TODO: Consider assigning different costs to different scalar
3135      statements.  */
3136
3137   scalar_single_iter_cost
3138     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3139
3140   /* Add additional cost for the peeled instructions in prologue and epilogue
3141      loop.
3142
3143      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3144      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3145
3146      TODO: Build an expression that represents peel_iters for prologue and
3147      epilogue to be used in a run-time test.  */
3148
3149   if (npeel  < 0)
3150     {
3151       peel_iters_prologue = vf/2;
3152       dump_printf (MSG_NOTE, "cost model: "
3153                    "prologue peel iters set to vf/2.\n");
3154
3155       /* If peeling for alignment is unknown, loop bound of main loop becomes
3156          unknown.  */
3157       peel_iters_epilogue = vf/2;
3158       dump_printf (MSG_NOTE, "cost model: "
3159                    "epilogue peel iters set to vf/2 because "
3160                    "peeling for alignment is unknown.\n");
3161
3162       /* If peeled iterations are unknown, count a taken branch and a not taken
3163          branch per peeled loop. Even if scalar loop iterations are known,
3164          vector iterations are not known since peeled prologue iterations are
3165          not known. Hence guards remain the same.  */
3166       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3167                             NULL, 0, vect_prologue);
3168       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3169                             NULL, 0, vect_prologue);
3170       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3171                             NULL, 0, vect_epilogue);
3172       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3173                             NULL, 0, vect_epilogue);
3174       stmt_info_for_cost *si;
3175       int j;
3176       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3177         {
3178           struct _stmt_vec_info *stmt_info
3179             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3180           (void) add_stmt_cost (target_cost_data,
3181                                 si->count * peel_iters_prologue,
3182                                 si->kind, stmt_info, si->misalign,
3183                                 vect_prologue);
3184           (void) add_stmt_cost (target_cost_data,
3185                                 si->count * peel_iters_epilogue,
3186                                 si->kind, stmt_info, si->misalign,
3187                                 vect_epilogue);
3188         }
3189     }
3190   else
3191     {
3192       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3193       stmt_info_for_cost *si;
3194       int j;
3195       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3196
3197       prologue_cost_vec.create (2);
3198       epilogue_cost_vec.create (2);
3199       peel_iters_prologue = npeel;
3200
3201       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3202                                           &peel_iters_epilogue,
3203                                           &LOOP_VINFO_SCALAR_ITERATION_COST
3204                                             (loop_vinfo),
3205                                           &prologue_cost_vec,
3206                                           &epilogue_cost_vec);
3207
3208       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3209         {
3210           struct _stmt_vec_info *stmt_info
3211             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3212           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3213                                 si->misalign, vect_prologue);
3214         }
3215
3216       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3217         {
3218           struct _stmt_vec_info *stmt_info
3219             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3220           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3221                                 si->misalign, vect_epilogue);
3222         }
3223
3224       prologue_cost_vec.release ();
3225       epilogue_cost_vec.release ();
3226     }
3227
3228   /* FORNOW: The scalar outside cost is incremented in one of the
3229      following ways:
3230
3231      1. The vectorizer checks for alignment and aliasing and generates
3232      a condition that allows dynamic vectorization.  A cost model
3233      check is ANDED with the versioning condition.  Hence scalar code
3234      path now has the added cost of the versioning check.
3235
3236        if (cost > th & versioning_check)
3237          jmp to vector code
3238
3239      Hence run-time scalar is incremented by not-taken branch cost.
3240
3241      2. The vectorizer then checks if a prologue is required.  If the
3242      cost model check was not done before during versioning, it has to
3243      be done before the prologue check.
3244
3245        if (cost <= th)
3246          prologue = scalar_iters
3247        if (prologue == 0)
3248          jmp to vector code
3249        else
3250          execute prologue
3251        if (prologue == num_iters)
3252          go to exit
3253
3254      Hence the run-time scalar cost is incremented by a taken branch,
3255      plus a not-taken branch, plus a taken branch cost.
3256
3257      3. The vectorizer then checks if an epilogue is required.  If the
3258      cost model check was not done before during prologue check, it
3259      has to be done with the epilogue check.
3260
3261        if (prologue == 0)
3262          jmp to vector code
3263        else
3264          execute prologue
3265        if (prologue == num_iters)
3266          go to exit
3267        vector code:
3268          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3269            jmp to epilogue
3270
3271      Hence the run-time scalar cost should be incremented by 2 taken
3272      branches.
3273
3274      TODO: The back end may reorder the BBS's differently and reverse
3275      conditions/branch directions.  Change the estimates below to
3276      something more reasonable.  */
3277
3278   /* If the number of iterations is known and we do not do versioning, we can
3279      decide whether to vectorize at compile time.  Hence the scalar version
3280      do not carry cost model guard costs.  */
3281   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3282       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3283       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3284     {
3285       /* Cost model check occurs at versioning.  */
3286       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3287           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3288         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3289       else
3290         {
3291           /* Cost model check occurs at prologue generation.  */
3292           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3293             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3294               + vect_get_stmt_cost (cond_branch_not_taken);
3295           /* Cost model check occurs at epilogue generation.  */
3296           else
3297             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3298         }
3299     }
3300
3301   /* Complete the target-specific cost calculations.  */
3302   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3303                &vec_inside_cost, &vec_epilogue_cost);
3304
3305   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3306
3307   if (dump_enabled_p ())
3308     {
3309       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3310       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3311                    vec_inside_cost);
3312       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3313                    vec_prologue_cost);
3314       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3315                    vec_epilogue_cost);
3316       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3317                    scalar_single_iter_cost);
3318       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3319                    scalar_outside_cost);
3320       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3321                    vec_outside_cost);
3322       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3323                    peel_iters_prologue);
3324       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3325                    peel_iters_epilogue);
3326     }
3327
3328   /* Calculate number of iterations required to make the vector version
3329      profitable, relative to the loop bodies only.  The following condition
3330      must hold true:
3331      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3332      where
3333      SIC = scalar iteration cost, VIC = vector iteration cost,
3334      VOC = vector outside cost, VF = vectorization factor,
3335      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3336      SOC = scalar outside cost for run time cost model check.  */
3337
3338   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3339     {
3340       if (vec_outside_cost <= 0)
3341         min_profitable_iters = 1;
3342       else
3343         {
3344           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3345                                   - vec_inside_cost * peel_iters_prologue
3346                                   - vec_inside_cost * peel_iters_epilogue)
3347                                  / ((scalar_single_iter_cost * vf)
3348                                     - vec_inside_cost);
3349
3350           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3351               <= (((int) vec_inside_cost * min_profitable_iters)
3352                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3353             min_profitable_iters++;
3354         }
3355     }
3356   /* vector version will never be profitable.  */
3357   else
3358     {
3359       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3360         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3361                     "did not happen for a simd loop");
3362
3363       if (dump_enabled_p ())
3364         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3365                          "cost model: the vector iteration cost = %d "
3366                          "divided by the scalar iteration cost = %d "
3367                          "is greater or equal to the vectorization factor = %d"
3368                          ".\n",
3369                          vec_inside_cost, scalar_single_iter_cost, vf);
3370       *ret_min_profitable_niters = -1;
3371       *ret_min_profitable_estimate = -1;
3372       return;
3373     }
3374
3375   dump_printf (MSG_NOTE,
3376                "  Calculated minimum iters for profitability: %d\n",
3377                min_profitable_iters);
3378
3379   min_profitable_iters =
3380         min_profitable_iters < vf ? vf : min_profitable_iters;
3381
3382   /* Because the condition we create is:
3383      if (niters <= min_profitable_iters)
3384        then skip the vectorized loop.  */
3385   min_profitable_iters--;
3386
3387   if (dump_enabled_p ())
3388     dump_printf_loc (MSG_NOTE, vect_location,
3389                      "  Runtime profitability threshold = %d\n",
3390                      min_profitable_iters);
3391
3392   *ret_min_profitable_niters = min_profitable_iters;
3393
3394   /* Calculate number of iterations required to make the vector version
3395      profitable, relative to the loop bodies only.
3396
3397      Non-vectorized variant is SIC * niters and it must win over vector
3398      variant on the expected loop trip count.  The following condition must hold true:
3399      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3400
3401   if (vec_outside_cost <= 0)
3402     min_profitable_estimate = 1;
3403   else
3404     {
3405       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3406                                  - vec_inside_cost * peel_iters_prologue
3407                                  - vec_inside_cost * peel_iters_epilogue)
3408                                  / ((scalar_single_iter_cost * vf)
3409                                    - vec_inside_cost);
3410     }
3411   min_profitable_estimate --;
3412   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3413   if (dump_enabled_p ())
3414     dump_printf_loc (MSG_NOTE, vect_location,
3415                      "  Static estimate profitability threshold = %d\n",
3416                       min_profitable_iters);
3417
3418   *ret_min_profitable_estimate = min_profitable_estimate;
3419 }
3420
3421 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3422    vector elements (not bits) for a vector of mode MODE.  */
3423 static void
3424 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3425                               unsigned char *sel)
3426 {
3427   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3428
3429   for (i = 0; i < nelt; i++)
3430     sel[i] = (i + offset) & (2*nelt - 1);
3431 }
3432
3433 /* Checks whether the target supports whole-vector shifts for vectors of mode
3434    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3435    it supports vec_perm_const with masks for all necessary shift amounts.  */
3436 static bool
3437 have_whole_vector_shift (enum machine_mode mode)
3438 {
3439   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3440     return true;
3441
3442   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3443     return false;
3444
3445   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3446   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3447
3448   for (i = nelt/2; i >= 1; i/=2)
3449     {
3450       calc_vec_perm_mask_for_shift (mode, i, sel);
3451       if (!can_vec_perm_p (mode, false, sel))
3452         return false;
3453     }
3454   return true;
3455 }
3456
3457 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3458
3459 static tree
3460 get_reduction_op (gimple *stmt, int reduc_index)
3461 {
3462   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3463     {
3464     case GIMPLE_SINGLE_RHS:
3465       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3466                   == ternary_op);
3467       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3468     case GIMPLE_UNARY_RHS:
3469       return gimple_assign_rhs1 (stmt);
3470     case GIMPLE_BINARY_RHS:
3471       return (reduc_index
3472               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3473     case GIMPLE_TERNARY_RHS:
3474       return gimple_op (stmt, reduc_index + 1);
3475     default:
3476       gcc_unreachable ();
3477     }
3478 }
3479
3480 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3481    functions. Design better to avoid maintenance issues.  */
3482
3483 /* Function vect_model_reduction_cost.
3484
3485    Models cost for a reduction operation, including the vector ops
3486    generated within the strip-mine loop, the initial definition before
3487    the loop, and the epilogue code that must be generated.  */
3488
3489 static bool
3490 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3491                            int ncopies, int reduc_index)
3492 {
3493   int prologue_cost = 0, epilogue_cost = 0;
3494   enum tree_code code;
3495   optab optab;
3496   tree vectype;
3497   gimple *stmt, *orig_stmt;
3498   tree reduction_op;
3499   machine_mode mode;
3500   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3501   struct loop *loop = NULL;
3502   void *target_cost_data;
3503
3504   if (loop_vinfo)
3505     {
3506       loop = LOOP_VINFO_LOOP (loop_vinfo);
3507       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3508     }
3509   else
3510     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3511
3512   /* Condition reductions generate two reductions in the loop.  */
3513   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3514     ncopies *= 2;
3515
3516   /* Cost of reduction op inside loop.  */
3517   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3518                                         stmt_info, 0, vect_body);
3519   stmt = STMT_VINFO_STMT (stmt_info);
3520
3521   reduction_op = get_reduction_op (stmt, reduc_index);
3522
3523   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3524   if (!vectype)
3525     {
3526       if (dump_enabled_p ())
3527         {
3528           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3529                            "unsupported data-type ");
3530           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3531                              TREE_TYPE (reduction_op));
3532           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3533         }
3534       return false;
3535    }
3536
3537   mode = TYPE_MODE (vectype);
3538   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3539
3540   if (!orig_stmt)
3541     orig_stmt = STMT_VINFO_STMT (stmt_info);
3542
3543   code = gimple_assign_rhs_code (orig_stmt);
3544
3545   /* Add in cost for initial definition.
3546      For cond reduction we have four vectors: initial index, step, initial
3547      result of the data reduction, initial value of the index reduction.  */
3548   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3549                        == COND_REDUCTION ? 4 : 1;
3550   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3551                                   scalar_to_vec, stmt_info, 0,
3552                                   vect_prologue);
3553
3554   /* Determine cost of epilogue code.
3555
3556      We have a reduction operator that will reduce the vector in one statement.
3557      Also requires scalar extract.  */
3558
3559   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3560     {
3561       if (reduc_code != ERROR_MARK)
3562         {
3563           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3564             {
3565               /* An EQ stmt and an COND_EXPR stmt.  */
3566               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3567                                               vector_stmt, stmt_info, 0,
3568                                               vect_epilogue);
3569               /* Reduction of the max index and a reduction of the found
3570                  values.  */
3571               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3572                                               vec_to_scalar, stmt_info, 0,
3573                                               vect_epilogue);
3574               /* A broadcast of the max value.  */
3575               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3576                                               scalar_to_vec, stmt_info, 0,
3577                                               vect_epilogue);
3578             }
3579           else
3580             {
3581               epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3582                                               stmt_info, 0, vect_epilogue);
3583               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3584                                               vec_to_scalar, stmt_info, 0,
3585                                               vect_epilogue);
3586             }
3587         }
3588       else
3589         {
3590           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3591           tree bitsize =
3592             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3593           int element_bitsize = tree_to_uhwi (bitsize);
3594           int nelements = vec_size_in_bits / element_bitsize;
3595
3596           optab = optab_for_tree_code (code, vectype, optab_default);
3597
3598           /* We have a whole vector shift available.  */
3599           if (VECTOR_MODE_P (mode)
3600               && optab_handler (optab, mode) != CODE_FOR_nothing
3601               && have_whole_vector_shift (mode))
3602             {
3603               /* Final reduction via vector shifts and the reduction operator.
3604                  Also requires scalar extract.  */
3605               epilogue_cost += add_stmt_cost (target_cost_data,
3606                                               exact_log2 (nelements) * 2,
3607                                               vector_stmt, stmt_info, 0,
3608                                               vect_epilogue);
3609               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3610                                               vec_to_scalar, stmt_info, 0,
3611                                               vect_epilogue);
3612             }
3613           else
3614             /* Use extracts and reduction op for final reduction.  For N
3615                elements, we have N extracts and N-1 reduction ops.  */
3616             epilogue_cost += add_stmt_cost (target_cost_data,
3617                                             nelements + nelements - 1,
3618                                             vector_stmt, stmt_info, 0,
3619                                             vect_epilogue);
3620         }
3621     }
3622
3623   if (dump_enabled_p ())
3624     dump_printf (MSG_NOTE,
3625                  "vect_model_reduction_cost: inside_cost = %d, "
3626                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3627                  prologue_cost, epilogue_cost);
3628
3629   return true;
3630 }
3631
3632
3633 /* Function vect_model_induction_cost.
3634
3635    Models cost for induction operations.  */
3636
3637 static void
3638 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3639 {
3640   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3641   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3642   unsigned inside_cost, prologue_cost;
3643
3644   /* loop cost for vec_loop.  */
3645   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3646                                stmt_info, 0, vect_body);
3647
3648   /* prologue cost for vec_init and vec_step.  */
3649   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3650                                  stmt_info, 0, vect_prologue);
3651
3652   if (dump_enabled_p ())
3653     dump_printf_loc (MSG_NOTE, vect_location,
3654                      "vect_model_induction_cost: inside_cost = %d, "
3655                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3656 }
3657
3658
3659 /* Function get_initial_def_for_induction
3660
3661    Input:
3662    STMT - a stmt that performs an induction operation in the loop.
3663    IV_PHI - the initial value of the induction variable
3664
3665    Output:
3666    Return a vector variable, initialized with the first VF values of
3667    the induction variable.  E.g., for an iv with IV_PHI='X' and
3668    evolution S, for a vector of 4 units, we want to return:
3669    [X, X + S, X + 2*S, X + 3*S].  */
3670
3671 static tree
3672 get_initial_def_for_induction (gimple *iv_phi)
3673 {
3674   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3675   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3676   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3677   tree vectype;
3678   int nunits;
3679   edge pe = loop_preheader_edge (loop);
3680   struct loop *iv_loop;
3681   basic_block new_bb;
3682   tree new_vec, vec_init, vec_step, t;
3683   tree new_name;
3684   gimple *new_stmt;
3685   gphi *induction_phi;
3686   tree induc_def, vec_def, vec_dest;
3687   tree init_expr, step_expr;
3688   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3689   int i;
3690   int ncopies;
3691   tree expr;
3692   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3693   bool nested_in_vect_loop = false;
3694   gimple_seq stmts;
3695   imm_use_iterator imm_iter;
3696   use_operand_p use_p;
3697   gimple *exit_phi;
3698   edge latch_e;
3699   tree loop_arg;
3700   gimple_stmt_iterator si;
3701   basic_block bb = gimple_bb (iv_phi);
3702   tree stepvectype;
3703   tree resvectype;
3704
3705   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3706   if (nested_in_vect_loop_p (loop, iv_phi))
3707     {
3708       nested_in_vect_loop = true;
3709       iv_loop = loop->inner;
3710     }
3711   else
3712     iv_loop = loop;
3713   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3714
3715   latch_e = loop_latch_edge (iv_loop);
3716   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3717
3718   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3719   gcc_assert (step_expr != NULL_TREE);
3720
3721   pe = loop_preheader_edge (iv_loop);
3722   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3723                                      loop_preheader_edge (iv_loop));
3724
3725   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3726   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3727   gcc_assert (vectype);
3728   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3729   ncopies = vf / nunits;
3730
3731   gcc_assert (phi_info);
3732   gcc_assert (ncopies >= 1);
3733
3734   /* Convert the step to the desired type.  */
3735   stmts = NULL;
3736   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
3737   if (stmts)
3738     {
3739       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3740       gcc_assert (!new_bb);
3741     }
3742
3743   /* Find the first insertion point in the BB.  */
3744   si = gsi_after_labels (bb);
3745
3746   /* Create the vector that holds the initial_value of the induction.  */
3747   if (nested_in_vect_loop)
3748     {
3749       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3750          been created during vectorization of previous stmts.  We obtain it
3751          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3752       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi);
3753       /* If the initial value is not of proper type, convert it.  */
3754       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3755         {
3756           new_stmt
3757             = gimple_build_assign (vect_get_new_ssa_name (vectype,
3758                                                           vect_simple_var,
3759                                                           "vec_iv_"),
3760                                    VIEW_CONVERT_EXPR,
3761                                    build1 (VIEW_CONVERT_EXPR, vectype,
3762                                            vec_init));
3763           vec_init = gimple_assign_lhs (new_stmt);
3764           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3765                                                  new_stmt);
3766           gcc_assert (!new_bb);
3767           set_vinfo_for_stmt (new_stmt,
3768                               new_stmt_vec_info (new_stmt, loop_vinfo));
3769         }
3770     }
3771   else
3772     {
3773       vec<constructor_elt, va_gc> *v;
3774
3775       /* iv_loop is the loop to be vectorized. Create:
3776          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3777       stmts = NULL;
3778       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
3779
3780       vec_alloc (v, nunits);
3781       bool constant_p = is_gimple_min_invariant (new_name);
3782       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3783       for (i = 1; i < nunits; i++)
3784         {
3785           /* Create: new_name_i = new_name + step_expr  */
3786           new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
3787                                    new_name, step_expr);
3788           if (!is_gimple_min_invariant (new_name))
3789             constant_p = false;
3790           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3791         }
3792       if (stmts)
3793         {
3794           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3795           gcc_assert (!new_bb);
3796         }
3797
3798       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3799       if (constant_p)
3800         new_vec = build_vector_from_ctor (vectype, v);
3801       else
3802         new_vec = build_constructor (vectype, v);
3803       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3804     }
3805
3806
3807   /* Create the vector that holds the step of the induction.  */
3808   if (nested_in_vect_loop)
3809     /* iv_loop is nested in the loop to be vectorized. Generate:
3810        vec_step = [S, S, S, S]  */
3811     new_name = step_expr;
3812   else
3813     {
3814       /* iv_loop is the loop to be vectorized. Generate:
3815           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3816       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3817         {
3818           expr = build_int_cst (integer_type_node, vf);
3819           expr = fold_convert (TREE_TYPE (step_expr), expr);
3820         }
3821       else
3822         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3823       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3824                               expr, step_expr);
3825       if (TREE_CODE (step_expr) == SSA_NAME)
3826         new_name = vect_init_vector (iv_phi, new_name,
3827                                      TREE_TYPE (step_expr), NULL);
3828     }
3829
3830   t = unshare_expr (new_name);
3831   gcc_assert (CONSTANT_CLASS_P (new_name)
3832               || TREE_CODE (new_name) == SSA_NAME);
3833   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3834   gcc_assert (stepvectype);
3835   new_vec = build_vector_from_val (stepvectype, t);
3836   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3837
3838
3839   /* Create the following def-use cycle:
3840      loop prolog:
3841          vec_init = ...
3842          vec_step = ...
3843      loop:
3844          vec_iv = PHI <vec_init, vec_loop>
3845          ...
3846          STMT
3847          ...
3848          vec_loop = vec_iv + vec_step;  */
3849
3850   /* Create the induction-phi that defines the induction-operand.  */
3851   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3852   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3853   set_vinfo_for_stmt (induction_phi,
3854                       new_stmt_vec_info (induction_phi, loop_vinfo));
3855   induc_def = PHI_RESULT (induction_phi);
3856
3857   /* Create the iv update inside the loop  */
3858   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3859   vec_def = make_ssa_name (vec_dest, new_stmt);
3860   gimple_assign_set_lhs (new_stmt, vec_def);
3861   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3862   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
3863
3864   /* Set the arguments of the phi node:  */
3865   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3866   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3867                UNKNOWN_LOCATION);
3868
3869
3870   /* In case that vectorization factor (VF) is bigger than the number
3871      of elements that we can fit in a vectype (nunits), we have to generate
3872      more than one vector stmt - i.e - we need to "unroll" the
3873      vector stmt by a factor VF/nunits.  For more details see documentation
3874      in vectorizable_operation.  */
3875
3876   if (ncopies > 1)
3877     {
3878       stmt_vec_info prev_stmt_vinfo;
3879       /* FORNOW. This restriction should be relaxed.  */
3880       gcc_assert (!nested_in_vect_loop);
3881
3882       /* Create the vector that holds the step of the induction.  */
3883       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3884         {
3885           expr = build_int_cst (integer_type_node, nunits);
3886           expr = fold_convert (TREE_TYPE (step_expr), expr);
3887         }
3888       else
3889         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3890       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3891                               expr, step_expr);
3892       if (TREE_CODE (step_expr) == SSA_NAME)
3893         new_name = vect_init_vector (iv_phi, new_name,
3894                                      TREE_TYPE (step_expr), NULL);
3895       t = unshare_expr (new_name);
3896       gcc_assert (CONSTANT_CLASS_P (new_name)
3897                   || TREE_CODE (new_name) == SSA_NAME);
3898       new_vec = build_vector_from_val (stepvectype, t);
3899       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3900
3901       vec_def = induc_def;
3902       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3903       for (i = 1; i < ncopies; i++)
3904         {
3905           /* vec_i = vec_prev + vec_step  */
3906           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3907                                           vec_def, vec_step);
3908           vec_def = make_ssa_name (vec_dest, new_stmt);
3909           gimple_assign_set_lhs (new_stmt, vec_def);
3910
3911           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3912           if (!useless_type_conversion_p (resvectype, vectype))
3913             {
3914               new_stmt
3915                 = gimple_build_assign
3916                         (vect_get_new_vect_var (resvectype, vect_simple_var,
3917                                                 "vec_iv_"),
3918                          VIEW_CONVERT_EXPR,
3919                          build1 (VIEW_CONVERT_EXPR, resvectype,
3920                                  gimple_assign_lhs (new_stmt)));
3921               gimple_assign_set_lhs (new_stmt,
3922                                      make_ssa_name
3923                                        (gimple_assign_lhs (new_stmt), new_stmt));
3924               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3925             }
3926           set_vinfo_for_stmt (new_stmt,
3927                               new_stmt_vec_info (new_stmt, loop_vinfo));
3928           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3929           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3930         }
3931     }
3932
3933   if (nested_in_vect_loop)
3934     {
3935       /* Find the loop-closed exit-phi of the induction, and record
3936          the final vector of induction results:  */
3937       exit_phi = NULL;
3938       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3939         {
3940           gimple *use_stmt = USE_STMT (use_p);
3941           if (is_gimple_debug (use_stmt))
3942             continue;
3943
3944           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3945             {
3946               exit_phi = use_stmt;
3947               break;
3948             }
3949         }
3950       if (exit_phi)
3951         {
3952           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3953           /* FORNOW. Currently not supporting the case that an inner-loop induction
3954              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3955           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3956                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3957
3958           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3959           if (dump_enabled_p ())
3960             {
3961               dump_printf_loc (MSG_NOTE, vect_location,
3962                                "vector of inductions after inner-loop:");
3963               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3964               dump_printf (MSG_NOTE, "\n");
3965             }
3966         }
3967     }
3968
3969
3970   if (dump_enabled_p ())
3971     {
3972       dump_printf_loc (MSG_NOTE, vect_location,
3973                        "transform induction: created def-use cycle: ");
3974       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3975       dump_printf (MSG_NOTE, "\n");
3976       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3977                         SSA_NAME_DEF_STMT (vec_def), 0);
3978       dump_printf (MSG_NOTE, "\n");
3979     }
3980
3981   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3982   if (!useless_type_conversion_p (resvectype, vectype))
3983     {
3984       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3985                                                              vect_simple_var,
3986                                                              "vec_iv_"),
3987                                       VIEW_CONVERT_EXPR,
3988                                       build1 (VIEW_CONVERT_EXPR, resvectype,
3989                                               induc_def));
3990       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3991       gimple_assign_set_lhs (new_stmt, induc_def);
3992       si = gsi_after_labels (bb);
3993       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3994       set_vinfo_for_stmt (new_stmt,
3995                           new_stmt_vec_info (new_stmt, loop_vinfo));
3996       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3997         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3998     }
3999
4000   return induc_def;
4001 }
4002
4003
4004 /* Function get_initial_def_for_reduction
4005
4006    Input:
4007    STMT - a stmt that performs a reduction operation in the loop.
4008    INIT_VAL - the initial value of the reduction variable
4009
4010    Output:
4011    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
4012         of the reduction (used for adjusting the epilog - see below).
4013    Return a vector variable, initialized according to the operation that STMT
4014         performs. This vector will be used as the initial value of the
4015         vector of partial results.
4016
4017    Option1 (adjust in epilog): Initialize the vector as follows:
4018      add/bit or/xor:    [0,0,...,0,0]
4019      mult/bit and:      [1,1,...,1,1]
4020      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
4021    and when necessary (e.g. add/mult case) let the caller know
4022    that it needs to adjust the result by init_val.
4023
4024    Option2: Initialize the vector as follows:
4025      add/bit or/xor:    [init_val,0,0,...,0]
4026      mult/bit and:      [init_val,1,1,...,1]
4027      min/max/cond_expr: [init_val,init_val,...,init_val]
4028    and no adjustments are needed.
4029
4030    For example, for the following code:
4031
4032    s = init_val;
4033    for (i=0;i<n;i++)
4034      s = s + a[i];
4035
4036    STMT is 's = s + a[i]', and the reduction variable is 's'.
4037    For a vector of 4 units, we want to return either [0,0,0,init_val],
4038    or [0,0,0,0] and let the caller know that it needs to adjust
4039    the result at the end by 'init_val'.
4040
4041    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
4042    initialization vector is simpler (same element in all entries), if
4043    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
4044
4045    A cost model should help decide between these two schemes.  */
4046
4047 tree
4048 get_initial_def_for_reduction (gimple *stmt, tree init_val,
4049                                tree *adjustment_def)
4050 {
4051   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
4052   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4053   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4054   tree scalar_type = TREE_TYPE (init_val);
4055   tree vectype = get_vectype_for_scalar_type (scalar_type);
4056   int nunits;
4057   enum tree_code code = gimple_assign_rhs_code (stmt);
4058   tree def_for_init;
4059   tree init_def;
4060   tree *elts;
4061   int i;
4062   bool nested_in_vect_loop = false;
4063   tree init_value;
4064   REAL_VALUE_TYPE real_init_val = dconst0;
4065   int int_init_val = 0;
4066   gimple *def_stmt = NULL;
4067
4068   gcc_assert (vectype);
4069   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4070
4071   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
4072               || SCALAR_FLOAT_TYPE_P (scalar_type));
4073
4074   if (nested_in_vect_loop_p (loop, stmt))
4075     nested_in_vect_loop = true;
4076   else
4077     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
4078
4079   /* In case of double reduction we only create a vector variable to be put
4080      in the reduction phi node.  The actual statement creation is done in
4081      vect_create_epilog_for_reduction.  */
4082   if (adjustment_def && nested_in_vect_loop
4083       && TREE_CODE (init_val) == SSA_NAME
4084       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
4085       && gimple_code (def_stmt) == GIMPLE_PHI
4086       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
4087       && vinfo_for_stmt (def_stmt)
4088       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
4089           == vect_double_reduction_def)
4090     {
4091       *adjustment_def = NULL;
4092       return vect_create_destination_var (init_val, vectype);
4093     }
4094
4095   if (TREE_CONSTANT (init_val))
4096     {
4097       if (SCALAR_FLOAT_TYPE_P (scalar_type))
4098         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
4099       else
4100         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
4101     }
4102   else
4103     init_value = init_val;
4104
4105   switch (code)
4106     {
4107       case WIDEN_SUM_EXPR:
4108       case DOT_PROD_EXPR:
4109       case SAD_EXPR:
4110       case PLUS_EXPR:
4111       case MINUS_EXPR:
4112       case BIT_IOR_EXPR:
4113       case BIT_XOR_EXPR:
4114       case MULT_EXPR:
4115       case BIT_AND_EXPR:
4116         /* ADJUSMENT_DEF is NULL when called from
4117            vect_create_epilog_for_reduction to vectorize double reduction.  */
4118         if (adjustment_def)
4119           {
4120             if (nested_in_vect_loop)
4121               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt);
4122             else
4123               *adjustment_def = init_val;
4124           }
4125
4126         if (code == MULT_EXPR)
4127           {
4128             real_init_val = dconst1;
4129             int_init_val = 1;
4130           }
4131
4132         if (code == BIT_AND_EXPR)
4133           int_init_val = -1;
4134
4135         if (SCALAR_FLOAT_TYPE_P (scalar_type))
4136           def_for_init = build_real (scalar_type, real_init_val);
4137         else
4138           def_for_init = build_int_cst (scalar_type, int_init_val);
4139
4140         /* Create a vector of '0' or '1' except the first element.  */
4141         elts = XALLOCAVEC (tree, nunits);
4142         for (i = nunits - 2; i >= 0; --i)
4143           elts[i + 1] = def_for_init;
4144
4145         /* Option1: the first element is '0' or '1' as well.  */
4146         if (adjustment_def)
4147           {
4148             elts[0] = def_for_init;
4149             init_def = build_vector (vectype, elts);
4150             break;
4151           }
4152
4153         /* Option2: the first element is INIT_VAL.  */
4154         elts[0] = init_val;
4155         if (TREE_CONSTANT (init_val))
4156           init_def = build_vector (vectype, elts);
4157         else
4158           {
4159             vec<constructor_elt, va_gc> *v;
4160             vec_alloc (v, nunits);
4161             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4162             for (i = 1; i < nunits; ++i)
4163               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4164             init_def = build_constructor (vectype, v);
4165           }
4166
4167         break;
4168
4169       case MIN_EXPR:
4170       case MAX_EXPR:
4171       case COND_EXPR:
4172         if (adjustment_def)
4173           {
4174             *adjustment_def = NULL_TREE;
4175             if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4176               {
4177                 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4178                 break;
4179               }
4180           }
4181         init_def = build_vector_from_val (vectype, init_value);
4182         break;
4183
4184       default:
4185         gcc_unreachable ();
4186     }
4187
4188   return init_def;
4189 }
4190
4191 /* Function vect_create_epilog_for_reduction
4192
4193    Create code at the loop-epilog to finalize the result of a reduction
4194    computation.
4195
4196    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4197      reduction statements.
4198    STMT is the scalar reduction stmt that is being vectorized.
4199    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4200      number of elements that we can fit in a vectype (nunits).  In this case
4201      we have to generate more than one vector stmt - i.e - we need to "unroll"
4202      the vector stmt by a factor VF/nunits.  For more details see documentation
4203      in vectorizable_operation.
4204    REDUC_CODE is the tree-code for the epilog reduction.
4205    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4206      computation.
4207    REDUC_INDEX is the index of the operand in the right hand side of the
4208      statement that is defined by REDUCTION_PHI.
4209    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4210    SLP_NODE is an SLP node containing a group of reduction statements. The
4211      first one in this group is STMT.
4212    INDUCTION_INDEX is the index of the loop for condition reductions.
4213      Otherwise it is undefined.
4214
4215    This function:
4216    1. Creates the reduction def-use cycles: sets the arguments for
4217       REDUCTION_PHIS:
4218       The loop-entry argument is the vectorized initial-value of the reduction.
4219       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4220       sums.
4221    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4222       by applying the operation specified by REDUC_CODE if available, or by
4223       other means (whole-vector shifts or a scalar loop).
4224       The function also creates a new phi node at the loop exit to preserve
4225       loop-closed form, as illustrated below.
4226
4227      The flow at the entry to this function:
4228
4229         loop:
4230           vec_def = phi <null, null>            # REDUCTION_PHI
4231           VECT_DEF = vector_stmt                # vectorized form of STMT
4232           s_loop = scalar_stmt                  # (scalar) STMT
4233         loop_exit:
4234           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4235           use <s_out0>
4236           use <s_out0>
4237
4238      The above is transformed by this function into:
4239
4240         loop:
4241           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4242           VECT_DEF = vector_stmt                # vectorized form of STMT
4243           s_loop = scalar_stmt                  # (scalar) STMT
4244         loop_exit:
4245           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4246           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4247           v_out2 = reduce <v_out1>
4248           s_out3 = extract_field <v_out2, 0>
4249           s_out4 = adjust_result <s_out3>
4250           use <s_out4>
4251           use <s_out4>
4252 */
4253
4254 static void
4255 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4256                                   int ncopies, enum tree_code reduc_code,
4257                                   vec<gimple *> reduction_phis,
4258                                   int reduc_index, bool double_reduc,
4259                                   slp_tree slp_node, tree induction_index)
4260 {
4261   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4262   stmt_vec_info prev_phi_info;
4263   tree vectype;
4264   machine_mode mode;
4265   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4266   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4267   basic_block exit_bb;
4268   tree scalar_dest;
4269   tree scalar_type;
4270   gimple *new_phi = NULL, *phi;
4271   gimple_stmt_iterator exit_gsi;
4272   tree vec_dest;
4273   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4274   gimple *epilog_stmt = NULL;
4275   enum tree_code code = gimple_assign_rhs_code (stmt);
4276   gimple *exit_phi;
4277   tree bitsize;
4278   tree adjustment_def = NULL;
4279   tree vec_initial_def = NULL;
4280   tree reduction_op, expr, def, initial_def = NULL;
4281   tree orig_name, scalar_result;
4282   imm_use_iterator imm_iter, phi_imm_iter;
4283   use_operand_p use_p, phi_use_p;
4284   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4285   bool nested_in_vect_loop = false;
4286   auto_vec<gimple *> new_phis;
4287   auto_vec<gimple *> inner_phis;
4288   enum vect_def_type dt = vect_unknown_def_type;
4289   int j, i;
4290   auto_vec<tree> scalar_results;
4291   unsigned int group_size = 1, k, ratio;
4292   auto_vec<tree> vec_initial_defs;
4293   auto_vec<gimple *> phis;
4294   bool slp_reduc = false;
4295   tree new_phi_result;
4296   gimple *inner_phi = NULL;
4297
4298   if (slp_node)
4299     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4300
4301   if (nested_in_vect_loop_p (loop, stmt))
4302     {
4303       outer_loop = loop;
4304       loop = loop->inner;
4305       nested_in_vect_loop = true;
4306       gcc_assert (!slp_node);
4307     }
4308
4309   reduction_op = get_reduction_op (stmt, reduc_index);
4310
4311   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4312   gcc_assert (vectype);
4313   mode = TYPE_MODE (vectype);
4314
4315   /* 1. Create the reduction def-use cycle:
4316      Set the arguments of REDUCTION_PHIS, i.e., transform
4317
4318         loop:
4319           vec_def = phi <null, null>            # REDUCTION_PHI
4320           VECT_DEF = vector_stmt                # vectorized form of STMT
4321           ...
4322
4323      into:
4324
4325         loop:
4326           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4327           VECT_DEF = vector_stmt                # vectorized form of STMT
4328           ...
4329
4330      (in case of SLP, do it for all the phis). */
4331
4332   /* Get the loop-entry arguments.  */
4333   if (slp_node)
4334     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4335                        NULL, slp_node, reduc_index);
4336   else
4337     {
4338       /* Get at the scalar def before the loop, that defines the initial value
4339          of the reduction variable.  */
4340       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4341       initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4342                                            loop_preheader_edge (loop));
4343       vec_initial_defs.create (1);
4344       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4345                                                        &adjustment_def);
4346       vec_initial_defs.quick_push (vec_initial_def);
4347     }
4348
4349   /* Set phi nodes arguments.  */
4350   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4351     {
4352       tree vec_init_def, def;
4353       gimple_seq stmts;
4354       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4355                                            true, NULL_TREE);
4356       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4357       def = vect_defs[i];
4358       for (j = 0; j < ncopies; j++)
4359         {
4360           /* Set the loop-entry arg of the reduction-phi.  */
4361
4362           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4363               == INTEGER_INDUC_COND_REDUCTION)
4364             {
4365               /* Initialise the reduction phi to zero.  This prevents initial
4366                  values of non-zero interferring with the reduction op.  */
4367               gcc_assert (ncopies == 1);
4368               gcc_assert (i == 0);
4369
4370               tree vec_init_def_type = TREE_TYPE (vec_init_def);
4371               tree zero_vec = build_zero_cst (vec_init_def_type);
4372
4373               add_phi_arg (as_a <gphi *> (phi), zero_vec,
4374                            loop_preheader_edge (loop), UNKNOWN_LOCATION);
4375             }
4376           else
4377             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4378                          loop_preheader_edge (loop), UNKNOWN_LOCATION);
4379
4380           /* Set the loop-latch arg for the reduction-phi.  */
4381           if (j > 0)
4382             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4383
4384           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4385                        UNKNOWN_LOCATION);
4386
4387           if (dump_enabled_p ())
4388             {
4389               dump_printf_loc (MSG_NOTE, vect_location,
4390                                "transform reduction: created def-use cycle: ");
4391               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4392               dump_printf (MSG_NOTE, "\n");
4393               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4394               dump_printf (MSG_NOTE, "\n");
4395             }
4396
4397           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4398         }
4399     }
4400
4401   /* 2. Create epilog code.
4402         The reduction epilog code operates across the elements of the vector
4403         of partial results computed by the vectorized loop.
4404         The reduction epilog code consists of:
4405
4406         step 1: compute the scalar result in a vector (v_out2)
4407         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4408         step 3: adjust the scalar result (s_out3) if needed.
4409
4410         Step 1 can be accomplished using one the following three schemes:
4411           (scheme 1) using reduc_code, if available.
4412           (scheme 2) using whole-vector shifts, if available.
4413           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4414                      combined.
4415
4416           The overall epilog code looks like this:
4417
4418           s_out0 = phi <s_loop>         # original EXIT_PHI
4419           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4420           v_out2 = reduce <v_out1>              # step 1
4421           s_out3 = extract_field <v_out2, 0>    # step 2
4422           s_out4 = adjust_result <s_out3>       # step 3
4423
4424           (step 3 is optional, and steps 1 and 2 may be combined).
4425           Lastly, the uses of s_out0 are replaced by s_out4.  */
4426
4427
4428   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4429          v_out1 = phi <VECT_DEF>
4430          Store them in NEW_PHIS.  */
4431
4432   exit_bb = single_exit (loop)->dest;
4433   prev_phi_info = NULL;
4434   new_phis.create (vect_defs.length ());
4435   FOR_EACH_VEC_ELT (vect_defs, i, def)
4436     {
4437       for (j = 0; j < ncopies; j++)
4438         {
4439           tree new_def = copy_ssa_name (def);
4440           phi = create_phi_node (new_def, exit_bb);
4441           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4442           if (j == 0)
4443             new_phis.quick_push (phi);
4444           else
4445             {
4446               def = vect_get_vec_def_for_stmt_copy (dt, def);
4447               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4448             }
4449
4450           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4451           prev_phi_info = vinfo_for_stmt (phi);
4452         }
4453     }
4454
4455   /* The epilogue is created for the outer-loop, i.e., for the loop being
4456      vectorized.  Create exit phis for the outer loop.  */
4457   if (double_reduc)
4458     {
4459       loop = outer_loop;
4460       exit_bb = single_exit (loop)->dest;
4461       inner_phis.create (vect_defs.length ());
4462       FOR_EACH_VEC_ELT (new_phis, i, phi)
4463         {
4464           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4465           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4466           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4467                            PHI_RESULT (phi));
4468           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4469                                                             loop_vinfo));
4470           inner_phis.quick_push (phi);
4471           new_phis[i] = outer_phi;
4472           prev_phi_info = vinfo_for_stmt (outer_phi);
4473           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4474             {
4475               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4476               new_result = copy_ssa_name (PHI_RESULT (phi));
4477               outer_phi = create_phi_node (new_result, exit_bb);
4478               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4479                                PHI_RESULT (phi));
4480               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4481                                                                 loop_vinfo));
4482               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4483               prev_phi_info = vinfo_for_stmt (outer_phi);
4484             }
4485         }
4486     }
4487
4488   exit_gsi = gsi_after_labels (exit_bb);
4489
4490   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4491          (i.e. when reduc_code is not available) and in the final adjustment
4492          code (if needed).  Also get the original scalar reduction variable as
4493          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4494          represents a reduction pattern), the tree-code and scalar-def are
4495          taken from the original stmt that the pattern-stmt (STMT) replaces.
4496          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4497          are taken from STMT.  */
4498
4499   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4500   if (!orig_stmt)
4501     {
4502       /* Regular reduction  */
4503       orig_stmt = stmt;
4504     }
4505   else
4506     {
4507       /* Reduction pattern  */
4508       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4509       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4510       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4511     }
4512
4513   code = gimple_assign_rhs_code (orig_stmt);
4514   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4515      partial results are added and not subtracted.  */
4516   if (code == MINUS_EXPR)
4517     code = PLUS_EXPR;
4518
4519   scalar_dest = gimple_assign_lhs (orig_stmt);
4520   scalar_type = TREE_TYPE (scalar_dest);
4521   scalar_results.create (group_size);
4522   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4523   bitsize = TYPE_SIZE (scalar_type);
4524
4525   /* In case this is a reduction in an inner-loop while vectorizing an outer
4526      loop - we don't need to extract a single scalar result at the end of the
4527      inner-loop (unless it is double reduction, i.e., the use of reduction is
4528      outside the outer-loop).  The final vector of partial results will be used
4529      in the vectorized outer-loop, or reduced to a scalar result at the end of
4530      the outer-loop.  */
4531   if (nested_in_vect_loop && !double_reduc)
4532     goto vect_finalize_reduction;
4533
4534   /* SLP reduction without reduction chain, e.g.,
4535      # a1 = phi <a2, a0>
4536      # b1 = phi <b2, b0>
4537      a2 = operation (a1)
4538      b2 = operation (b1)  */
4539   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4540
4541   /* In case of reduction chain, e.g.,
4542      # a1 = phi <a3, a0>
4543      a2 = operation (a1)
4544      a3 = operation (a2),
4545
4546      we may end up with more than one vector result.  Here we reduce them to
4547      one vector.  */
4548   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4549     {
4550       tree first_vect = PHI_RESULT (new_phis[0]);
4551       tree tmp;
4552       gassign *new_vec_stmt = NULL;
4553
4554       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4555       for (k = 1; k < new_phis.length (); k++)
4556         {
4557           gimple *next_phi = new_phis[k];
4558           tree second_vect = PHI_RESULT (next_phi);
4559
4560           tmp = build2 (code, vectype,  first_vect, second_vect);
4561           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4562           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4563           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4564           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4565         }
4566
4567       new_phi_result = first_vect;
4568       if (new_vec_stmt)
4569         {
4570           new_phis.truncate (0);
4571           new_phis.safe_push (new_vec_stmt);
4572         }
4573     }
4574   else
4575     new_phi_result = PHI_RESULT (new_phis[0]);
4576
4577   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4578     {
4579       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4580          various data values where the condition matched and another vector
4581          (INDUCTION_INDEX) containing all the indexes of those matches.  We
4582          need to extract the last matching index (which will be the index with
4583          highest value) and use this to index into the data vector.
4584          For the case where there were no matches, the data vector will contain
4585          all default values and the index vector will be all zeros.  */
4586
4587       /* Get various versions of the type of the vector of indexes.  */
4588       tree index_vec_type = TREE_TYPE (induction_index);
4589       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4590       tree index_scalar_type = TREE_TYPE (index_vec_type);
4591       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4592         (index_vec_type);
4593
4594       /* Get an unsigned integer version of the type of the data vector.  */
4595       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4596       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4597       tree vectype_unsigned = build_vector_type
4598         (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4599
4600       /* First we need to create a vector (ZERO_VEC) of zeros and another
4601          vector (MAX_INDEX_VEC) filled with the last matching index, which we
4602          can create using a MAX reduction and then expanding.
4603          In the case where the loop never made any matches, the max index will
4604          be zero.  */
4605
4606       /* Vector of {0, 0, 0,...}.  */
4607       tree zero_vec = make_ssa_name (vectype);
4608       tree zero_vec_rhs = build_zero_cst (vectype);
4609       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4610       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4611
4612       /* Find maximum value from the vector of found indexes.  */
4613       tree max_index = make_ssa_name (index_scalar_type);
4614       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4615                                                     induction_index);
4616       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4617
4618       /* Vector of {max_index, max_index, max_index,...}.  */
4619       tree max_index_vec = make_ssa_name (index_vec_type);
4620       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4621                                                       max_index);
4622       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4623                                                         max_index_vec_rhs);
4624       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4625
4626       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4627          with the vector (INDUCTION_INDEX) of found indexes, choosing values
4628          from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4629          otherwise.  Only one value should match, resulting in a vector
4630          (VEC_COND) with one data value and the rest zeros.
4631          In the case where the loop never made any matches, every index will
4632          match, resulting in a vector with all data values (which will all be
4633          the default value).  */
4634
4635       /* Compare the max index vector to the vector of found indexes to find
4636          the position of the max value.  */
4637       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4638       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4639                                                       induction_index,
4640                                                       max_index_vec);
4641       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4642
4643       /* Use the compare to choose either values from the data vector or
4644          zero.  */
4645       tree vec_cond = make_ssa_name (vectype);
4646       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4647                                                    vec_compare, new_phi_result,
4648                                                    zero_vec);
4649       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4650
4651       /* Finally we need to extract the data value from the vector (VEC_COND)
4652          into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4653          reduction, but because this doesn't exist, we can use a MAX reduction
4654          instead.  The data value might be signed or a float so we need to cast
4655          it first.
4656          In the case where the loop never made any matches, the data values are
4657          all identical, and so will reduce down correctly.  */
4658
4659       /* Make the matched data values unsigned.  */
4660       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4661       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4662                                        vec_cond);
4663       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4664                                                         VIEW_CONVERT_EXPR,
4665                                                         vec_cond_cast_rhs);
4666       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4667
4668       /* Reduce down to a scalar value.  */
4669       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4670       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4671                                       optab_default);
4672       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4673                   != CODE_FOR_nothing);
4674       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4675                                                      REDUC_MAX_EXPR,
4676                                                      vec_cond_cast);
4677       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4678
4679       /* Convert the reduced value back to the result type and set as the
4680          result.  */
4681       tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4682                                      data_reduc);
4683       epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4684       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4685       gimple_assign_set_lhs (epilog_stmt, new_temp);
4686       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4687       scalar_results.safe_push (new_temp);
4688     }
4689
4690   /* 2.3 Create the reduction code, using one of the three schemes described
4691          above. In SLP we simply need to extract all the elements from the
4692          vector (without reducing them), so we use scalar shifts.  */
4693   else if (reduc_code != ERROR_MARK && !slp_reduc)
4694     {
4695       tree tmp;
4696       tree vec_elem_type;
4697
4698       /*** Case 1:  Create:
4699            v_out2 = reduc_expr <v_out1>  */
4700
4701       if (dump_enabled_p ())
4702         dump_printf_loc (MSG_NOTE, vect_location,
4703                          "Reduce using direct vector reduction.\n");
4704
4705       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4706       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4707         {
4708           tree tmp_dest =
4709               vect_create_destination_var (scalar_dest, vec_elem_type);
4710           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4711           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4712           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4713           gimple_assign_set_lhs (epilog_stmt, new_temp);
4714           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4715
4716           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4717         }
4718       else
4719         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4720
4721       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4722       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4723       gimple_assign_set_lhs (epilog_stmt, new_temp);
4724       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4725
4726       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4727           == INTEGER_INDUC_COND_REDUCTION)
4728         {
4729           /* Earlier we set the initial value to be zero.  Check the result
4730              and if it is zero then replace with the original initial
4731              value.  */
4732           tree zero = build_zero_cst (scalar_type);
4733           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4734
4735           tmp = make_ssa_name (new_scalar_dest);
4736           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4737                                              initial_def, new_temp);
4738           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4739           new_temp = tmp;
4740         }
4741
4742       scalar_results.safe_push (new_temp);
4743     }
4744   else
4745     {
4746       bool reduce_with_shift = have_whole_vector_shift (mode);
4747       int element_bitsize = tree_to_uhwi (bitsize);
4748       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4749       tree vec_temp;
4750
4751       /* Regardless of whether we have a whole vector shift, if we're
4752          emulating the operation via tree-vect-generic, we don't want
4753          to use it.  Only the first round of the reduction is likely
4754          to still be profitable via emulation.  */
4755       /* ??? It might be better to emit a reduction tree code here, so that
4756          tree-vect-generic can expand the first round via bit tricks.  */
4757       if (!VECTOR_MODE_P (mode))
4758         reduce_with_shift = false;
4759       else
4760         {
4761           optab optab = optab_for_tree_code (code, vectype, optab_default);
4762           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4763             reduce_with_shift = false;
4764         }
4765
4766       if (reduce_with_shift && !slp_reduc)
4767         {
4768           int nelements = vec_size_in_bits / element_bitsize;
4769           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4770
4771           int elt_offset;
4772
4773           tree zero_vec = build_zero_cst (vectype);
4774           /*** Case 2: Create:
4775              for (offset = nelements/2; offset >= 1; offset/=2)
4776                 {
4777                   Create:  va' = vec_shift <va, offset>
4778                   Create:  va = vop <va, va'>
4779                 }  */
4780
4781           tree rhs;
4782
4783           if (dump_enabled_p ())
4784             dump_printf_loc (MSG_NOTE, vect_location,
4785                              "Reduce using vector shifts\n");
4786
4787           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4788           new_temp = new_phi_result;
4789           for (elt_offset = nelements / 2;
4790                elt_offset >= 1;
4791                elt_offset /= 2)
4792             {
4793               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4794               tree mask = vect_gen_perm_mask_any (vectype, sel);
4795               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4796                                                  new_temp, zero_vec, mask);
4797               new_name = make_ssa_name (vec_dest, epilog_stmt);
4798               gimple_assign_set_lhs (epilog_stmt, new_name);
4799               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4800
4801               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4802                                                  new_temp);
4803               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4804               gimple_assign_set_lhs (epilog_stmt, new_temp);
4805               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4806             }
4807
4808           /* 2.4  Extract the final scalar result.  Create:
4809              s_out3 = extract_field <v_out2, bitpos>  */
4810
4811           if (dump_enabled_p ())
4812             dump_printf_loc (MSG_NOTE, vect_location,
4813                              "extract scalar result\n");
4814
4815           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4816                         bitsize, bitsize_zero_node);
4817           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4818           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4819           gimple_assign_set_lhs (epilog_stmt, new_temp);
4820           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4821           scalar_results.safe_push (new_temp);
4822         }
4823       else
4824         {
4825           /*** Case 3: Create:
4826              s = extract_field <v_out2, 0>
4827              for (offset = element_size;
4828                   offset < vector_size;
4829                   offset += element_size;)
4830                {
4831                  Create:  s' = extract_field <v_out2, offset>
4832                  Create:  s = op <s, s'>  // For non SLP cases
4833                }  */
4834
4835           if (dump_enabled_p ())
4836             dump_printf_loc (MSG_NOTE, vect_location,
4837                              "Reduce using scalar code.\n");
4838
4839           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4840           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4841             {
4842               int bit_offset;
4843               if (gimple_code (new_phi) == GIMPLE_PHI)
4844                 vec_temp = PHI_RESULT (new_phi);
4845               else
4846                 vec_temp = gimple_assign_lhs (new_phi);
4847               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4848                             bitsize_zero_node);
4849               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4850               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4851               gimple_assign_set_lhs (epilog_stmt, new_temp);
4852               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4853
4854               /* In SLP we don't need to apply reduction operation, so we just
4855                  collect s' values in SCALAR_RESULTS.  */
4856               if (slp_reduc)
4857                 scalar_results.safe_push (new_temp);
4858
4859               for (bit_offset = element_bitsize;
4860                    bit_offset < vec_size_in_bits;
4861                    bit_offset += element_bitsize)
4862                 {
4863                   tree bitpos = bitsize_int (bit_offset);
4864                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4865                                      bitsize, bitpos);
4866
4867                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4868                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4869                   gimple_assign_set_lhs (epilog_stmt, new_name);
4870                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4871
4872                   if (slp_reduc)
4873                     {
4874                       /* In SLP we don't need to apply reduction operation, so
4875                          we just collect s' values in SCALAR_RESULTS.  */
4876                       new_temp = new_name;
4877                       scalar_results.safe_push (new_name);
4878                     }
4879                   else
4880                     {
4881                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4882                                                          new_name, new_temp);
4883                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4884                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4885                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4886                     }
4887                 }
4888             }
4889
4890           /* The only case where we need to reduce scalar results in SLP, is
4891              unrolling.  If the size of SCALAR_RESULTS is greater than
4892              GROUP_SIZE, we reduce them combining elements modulo
4893              GROUP_SIZE.  */
4894           if (slp_reduc)
4895             {
4896               tree res, first_res, new_res;
4897               gimple *new_stmt;
4898
4899               /* Reduce multiple scalar results in case of SLP unrolling.  */
4900               for (j = group_size; scalar_results.iterate (j, &res);
4901                    j++)
4902                 {
4903                   first_res = scalar_results[j % group_size];
4904                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4905                                                   first_res, res);
4906                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4907                   gimple_assign_set_lhs (new_stmt, new_res);
4908                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4909                   scalar_results[j % group_size] = new_res;
4910                 }
4911             }
4912           else
4913             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4914             scalar_results.safe_push (new_temp);
4915         }
4916     }
4917
4918 vect_finalize_reduction:
4919
4920   if (double_reduc)
4921     loop = loop->inner;
4922
4923   /* 2.5 Adjust the final result by the initial value of the reduction
4924          variable. (When such adjustment is not needed, then
4925          'adjustment_def' is zero).  For example, if code is PLUS we create:
4926          new_temp = loop_exit_def + adjustment_def  */
4927
4928   if (adjustment_def)
4929     {
4930       gcc_assert (!slp_reduc);
4931       if (nested_in_vect_loop)
4932         {
4933           new_phi = new_phis[0];
4934           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4935           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4936           new_dest = vect_create_destination_var (scalar_dest, vectype);
4937         }
4938       else
4939         {
4940           new_temp = scalar_results[0];
4941           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4942           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4943           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4944         }
4945
4946       epilog_stmt = gimple_build_assign (new_dest, expr);
4947       new_temp = make_ssa_name (new_dest, epilog_stmt);
4948       gimple_assign_set_lhs (epilog_stmt, new_temp);
4949       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4950       if (nested_in_vect_loop)
4951         {
4952           set_vinfo_for_stmt (epilog_stmt,
4953                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
4954           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4955                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4956
4957           if (!double_reduc)
4958             scalar_results.quick_push (new_temp);
4959           else
4960             scalar_results[0] = new_temp;
4961         }
4962       else
4963         scalar_results[0] = new_temp;
4964
4965       new_phis[0] = epilog_stmt;
4966     }
4967
4968   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4969           phis with new adjusted scalar results, i.e., replace use <s_out0>
4970           with use <s_out4>.
4971
4972      Transform:
4973         loop_exit:
4974           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4975           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4976           v_out2 = reduce <v_out1>
4977           s_out3 = extract_field <v_out2, 0>
4978           s_out4 = adjust_result <s_out3>
4979           use <s_out0>
4980           use <s_out0>
4981
4982      into:
4983
4984         loop_exit:
4985           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4986           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4987           v_out2 = reduce <v_out1>
4988           s_out3 = extract_field <v_out2, 0>
4989           s_out4 = adjust_result <s_out3>
4990           use <s_out4>
4991           use <s_out4> */
4992
4993
4994   /* In SLP reduction chain we reduce vector results into one vector if
4995      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4996      the last stmt in the reduction chain, since we are looking for the loop
4997      exit phi node.  */
4998   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4999     {
5000       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
5001       /* Handle reduction patterns.  */
5002       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
5003         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
5004
5005       scalar_dest = gimple_assign_lhs (dest_stmt);
5006       group_size = 1;
5007     }
5008
5009   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
5010      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
5011      need to match SCALAR_RESULTS with corresponding statements.  The first
5012      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
5013      the first vector stmt, etc.
5014      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
5015   if (group_size > new_phis.length ())
5016     {
5017       ratio = group_size / new_phis.length ();
5018       gcc_assert (!(group_size % new_phis.length ()));
5019     }
5020   else
5021     ratio = 1;
5022
5023   for (k = 0; k < group_size; k++)
5024     {
5025       if (k % ratio == 0)
5026         {
5027           epilog_stmt = new_phis[k / ratio];
5028           reduction_phi = reduction_phis[k / ratio];
5029           if (double_reduc)
5030             inner_phi = inner_phis[k / ratio];
5031         }
5032
5033       if (slp_reduc)
5034         {
5035           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5036
5037           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
5038           /* SLP statements can't participate in patterns.  */
5039           gcc_assert (!orig_stmt);
5040           scalar_dest = gimple_assign_lhs (current_stmt);
5041         }
5042
5043       phis.create (3);
5044       /* Find the loop-closed-use at the loop exit of the original scalar
5045          result.  (The reduction result is expected to have two immediate uses -
5046          one at the latch block, and one at the loop exit).  */
5047       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5048         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
5049             && !is_gimple_debug (USE_STMT (use_p)))
5050           phis.safe_push (USE_STMT (use_p));
5051
5052       /* While we expect to have found an exit_phi because of loop-closed-ssa
5053          form we can end up without one if the scalar cycle is dead.  */
5054
5055       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5056         {
5057           if (outer_loop)
5058             {
5059               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
5060               gphi *vect_phi;
5061
5062               /* FORNOW. Currently not supporting the case that an inner-loop
5063                  reduction is not used in the outer-loop (but only outside the
5064                  outer-loop), unless it is double reduction.  */
5065               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5066                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
5067                           || double_reduc);
5068
5069               if (double_reduc)
5070                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5071               else
5072                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
5073               if (!double_reduc
5074                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5075                       != vect_double_reduction_def)
5076                 continue;
5077
5078               /* Handle double reduction:
5079
5080                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
5081                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
5082                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
5083                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
5084
5085                  At that point the regular reduction (stmt2 and stmt3) is
5086                  already vectorized, as well as the exit phi node, stmt4.
5087                  Here we vectorize the phi node of double reduction, stmt1, and
5088                  update all relevant statements.  */
5089
5090               /* Go through all the uses of s2 to find double reduction phi
5091                  node, i.e., stmt1 above.  */
5092               orig_name = PHI_RESULT (exit_phi);
5093               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5094                 {
5095                   stmt_vec_info use_stmt_vinfo;
5096                   stmt_vec_info new_phi_vinfo;
5097                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5098                   basic_block bb = gimple_bb (use_stmt);
5099                   gimple *use;
5100
5101                   /* Check that USE_STMT is really double reduction phi
5102                      node.  */
5103                   if (gimple_code (use_stmt) != GIMPLE_PHI
5104                       || gimple_phi_num_args (use_stmt) != 2
5105                       || bb->loop_father != outer_loop)
5106                     continue;
5107                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5108                   if (!use_stmt_vinfo
5109                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5110                           != vect_double_reduction_def)
5111                     continue;
5112
5113                   /* Create vector phi node for double reduction:
5114                      vs1 = phi <vs0, vs2>
5115                      vs1 was created previously in this function by a call to
5116                        vect_get_vec_def_for_operand and is stored in
5117                        vec_initial_def;
5118                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5119                      vs0 is created here.  */
5120
5121                   /* Create vector phi node.  */
5122                   vect_phi = create_phi_node (vec_initial_def, bb);
5123                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
5124                                     loop_vec_info_for_loop (outer_loop));
5125                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5126
5127                   /* Create vs0 - initial def of the double reduction phi.  */
5128                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5129                                              loop_preheader_edge (outer_loop));
5130                   init_def = get_initial_def_for_reduction (stmt,
5131                                                           preheader_arg, NULL);
5132                   vect_phi_init = vect_init_vector (use_stmt, init_def,
5133                                                     vectype, NULL);
5134
5135                   /* Update phi node arguments with vs0 and vs2.  */
5136                   add_phi_arg (vect_phi, vect_phi_init,
5137                                loop_preheader_edge (outer_loop),
5138                                UNKNOWN_LOCATION);
5139                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5140                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5141                   if (dump_enabled_p ())
5142                     {
5143                       dump_printf_loc (MSG_NOTE, vect_location,
5144                                        "created double reduction phi node: ");
5145                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5146                       dump_printf (MSG_NOTE, "\n");
5147                     }
5148
5149                   vect_phi_res = PHI_RESULT (vect_phi);
5150
5151                   /* Replace the use, i.e., set the correct vs1 in the regular
5152                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5153                      loop is redundant.  */
5154                   use = reduction_phi;
5155                   for (j = 0; j < ncopies; j++)
5156                     {
5157                       edge pr_edge = loop_preheader_edge (loop);
5158                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5159                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5160                     }
5161                 }
5162             }
5163         }
5164
5165       phis.release ();
5166       if (nested_in_vect_loop)
5167         {
5168           if (double_reduc)
5169             loop = outer_loop;
5170           else
5171             continue;
5172         }
5173
5174       phis.create (3);
5175       /* Find the loop-closed-use at the loop exit of the original scalar
5176          result.  (The reduction result is expected to have two immediate uses,
5177          one at the latch block, and one at the loop exit).  For double
5178          reductions we are looking for exit phis of the outer loop.  */
5179       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5180         {
5181           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5182             {
5183               if (!is_gimple_debug (USE_STMT (use_p)))
5184                 phis.safe_push (USE_STMT (use_p));
5185             }
5186           else
5187             {
5188               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5189                 {
5190                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5191
5192                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5193                     {
5194                       if (!flow_bb_inside_loop_p (loop,
5195                                              gimple_bb (USE_STMT (phi_use_p)))
5196                           && !is_gimple_debug (USE_STMT (phi_use_p)))
5197                         phis.safe_push (USE_STMT (phi_use_p));
5198                     }
5199                 }
5200             }
5201         }
5202
5203       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5204         {
5205           /* Replace the uses:  */
5206           orig_name = PHI_RESULT (exit_phi);
5207           scalar_result = scalar_results[k];
5208           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5209             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5210               SET_USE (use_p, scalar_result);
5211         }
5212
5213       phis.release ();
5214     }
5215 }
5216
5217
5218 /* Function is_nonwrapping_integer_induction.
5219
5220    Check if STMT (which is part of loop LOOP) both increments and
5221    does not cause overflow.  */
5222
5223 static bool
5224 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5225 {
5226   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5227   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5228   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5229   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5230   widest_int ni, max_loop_value, lhs_max;
5231   bool overflow = false;
5232
5233   /* Make sure the loop is integer based.  */
5234   if (TREE_CODE (base) != INTEGER_CST
5235       || TREE_CODE (step) != INTEGER_CST)
5236     return false;
5237
5238   /* Check that the induction increments.  */
5239   if (tree_int_cst_sgn (step) == -1)
5240     return false;
5241
5242   /* Check that the max size of the loop will not wrap.  */
5243
5244   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5245     return true;
5246
5247   if (! max_stmt_executions (loop, &ni))
5248     return false;
5249
5250   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5251                             &overflow);
5252   if (overflow)
5253     return false;
5254
5255   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5256                             TYPE_SIGN (lhs_type), &overflow);
5257   if (overflow)
5258     return false;
5259
5260   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5261           <= TYPE_PRECISION (lhs_type));
5262 }
5263
5264 /* Function vectorizable_reduction.
5265
5266    Check if STMT performs a reduction operation that can be vectorized.
5267    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5268    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5269    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5270
5271    This function also handles reduction idioms (patterns) that have been
5272    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5273    of this form:
5274      X = pattern_expr (arg0, arg1, ..., X)
5275    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5276    sequence that had been detected and replaced by the pattern-stmt (STMT).
5277
5278    This function also handles reduction of condition expressions, for example:
5279      for (int i = 0; i < N; i++)
5280        if (a[i] < value)
5281          last = a[i];
5282    This is handled by vectorising the loop and creating an additional vector
5283    containing the loop indexes for which "a[i] < value" was true.  In the
5284    function epilogue this is reduced to a single max value and then used to
5285    index into the vector of results.
5286
5287    In some cases of reduction patterns, the type of the reduction variable X is
5288    different than the type of the other arguments of STMT.
5289    In such cases, the vectype that is used when transforming STMT into a vector
5290    stmt is different than the vectype that is used to determine the
5291    vectorization factor, because it consists of a different number of elements
5292    than the actual number of elements that are being operated upon in parallel.
5293
5294    For example, consider an accumulation of shorts into an int accumulator.
5295    On some targets it's possible to vectorize this pattern operating on 8
5296    shorts at a time (hence, the vectype for purposes of determining the
5297    vectorization factor should be V8HI); on the other hand, the vectype that
5298    is used to create the vector form is actually V4SI (the type of the result).
5299
5300    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5301    indicates what is the actual level of parallelism (V8HI in the example), so
5302    that the right vectorization factor would be derived.  This vectype
5303    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5304    be used to create the vectorized stmt.  The right vectype for the vectorized
5305    stmt is obtained from the type of the result X:
5306         get_vectype_for_scalar_type (TREE_TYPE (X))
5307
5308    This means that, contrary to "regular" reductions (or "regular" stmts in
5309    general), the following equation:
5310       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5311    does *NOT* necessarily hold for reduction patterns.  */
5312
5313 bool
5314 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5315                         gimple **vec_stmt, slp_tree slp_node)
5316 {
5317   tree vec_dest;
5318   tree scalar_dest;
5319   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5320   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5321   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5322   tree vectype_in = NULL_TREE;
5323   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5324   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5325   enum tree_code code, orig_code, epilog_reduc_code;
5326   machine_mode vec_mode;
5327   int op_type;
5328   optab optab, reduc_optab;
5329   tree new_temp = NULL_TREE;
5330   gimple *def_stmt;
5331   enum vect_def_type dt;
5332   gphi *new_phi = NULL;
5333   tree scalar_type;
5334   bool is_simple_use;
5335   gimple *orig_stmt;
5336   stmt_vec_info orig_stmt_info;
5337   tree expr = NULL_TREE;
5338   int i;
5339   int ncopies;
5340   int epilog_copies;
5341   stmt_vec_info prev_stmt_info, prev_phi_info;
5342   bool single_defuse_cycle = false;
5343   tree reduc_def = NULL_TREE;
5344   gimple *new_stmt = NULL;
5345   int j;
5346   tree ops[3];
5347   bool nested_cycle = false, found_nested_cycle_def = false;
5348   gimple *reduc_def_stmt = NULL;
5349   bool double_reduc = false, dummy;
5350   basic_block def_bb;
5351   struct loop * def_stmt_loop, *outer_loop = NULL;
5352   tree def_arg;
5353   gimple *def_arg_stmt;
5354   auto_vec<tree> vec_oprnds0;
5355   auto_vec<tree> vec_oprnds1;
5356   auto_vec<tree> vect_defs;
5357   auto_vec<gimple *> phis;
5358   int vec_num;
5359   tree def0, def1, tem, op0, op1 = NULL_TREE;
5360   bool first_p = true;
5361   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5362   gimple *cond_expr_induction_def_stmt = NULL;
5363
5364   /* In case of reduction chain we switch to the first stmt in the chain, but
5365      we don't update STMT_INFO, since only the last stmt is marked as reduction
5366      and has reduction properties.  */
5367   if (GROUP_FIRST_ELEMENT (stmt_info)
5368       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5369     {
5370       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5371       first_p = false;
5372     }
5373
5374   if (nested_in_vect_loop_p (loop, stmt))
5375     {
5376       outer_loop = loop;
5377       loop = loop->inner;
5378       nested_cycle = true;
5379     }
5380
5381   /* 1. Is vectorizable reduction?  */
5382   /* Not supportable if the reduction variable is used in the loop, unless
5383      it's a reduction chain.  */
5384   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5385       && !GROUP_FIRST_ELEMENT (stmt_info))
5386     return false;
5387
5388   /* Reductions that are not used even in an enclosing outer-loop,
5389      are expected to be "live" (used out of the loop).  */
5390   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5391       && !STMT_VINFO_LIVE_P (stmt_info))
5392     return false;
5393
5394   /* Make sure it was already recognized as a reduction computation.  */
5395   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5396       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5397     return false;
5398
5399   /* 2. Has this been recognized as a reduction pattern?
5400
5401      Check if STMT represents a pattern that has been recognized
5402      in earlier analysis stages.  For stmts that represent a pattern,
5403      the STMT_VINFO_RELATED_STMT field records the last stmt in
5404      the original sequence that constitutes the pattern.  */
5405
5406   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5407   if (orig_stmt)
5408     {
5409       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5410       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5411       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5412     }
5413
5414   /* 3. Check the operands of the operation.  The first operands are defined
5415         inside the loop body. The last operand is the reduction variable,
5416         which is defined by the loop-header-phi.  */
5417
5418   gcc_assert (is_gimple_assign (stmt));
5419
5420   /* Flatten RHS.  */
5421   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5422     {
5423     case GIMPLE_SINGLE_RHS:
5424       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5425       if (op_type == ternary_op)
5426         {
5427           tree rhs = gimple_assign_rhs1 (stmt);
5428           ops[0] = TREE_OPERAND (rhs, 0);
5429           ops[1] = TREE_OPERAND (rhs, 1);
5430           ops[2] = TREE_OPERAND (rhs, 2);
5431           code = TREE_CODE (rhs);
5432         }
5433       else
5434         return false;
5435       break;
5436
5437     case GIMPLE_BINARY_RHS:
5438       code = gimple_assign_rhs_code (stmt);
5439       op_type = TREE_CODE_LENGTH (code);
5440       gcc_assert (op_type == binary_op);
5441       ops[0] = gimple_assign_rhs1 (stmt);
5442       ops[1] = gimple_assign_rhs2 (stmt);
5443       break;
5444
5445     case GIMPLE_TERNARY_RHS:
5446       code = gimple_assign_rhs_code (stmt);
5447       op_type = TREE_CODE_LENGTH (code);
5448       gcc_assert (op_type == ternary_op);
5449       ops[0] = gimple_assign_rhs1 (stmt);
5450       ops[1] = gimple_assign_rhs2 (stmt);
5451       ops[2] = gimple_assign_rhs3 (stmt);
5452       break;
5453
5454     case GIMPLE_UNARY_RHS:
5455       return false;
5456
5457     default:
5458       gcc_unreachable ();
5459     }
5460   /* The default is that the reduction variable is the last in statement.  */
5461   int reduc_index = op_type - 1;
5462   if (code == MINUS_EXPR)
5463     reduc_index = 0;
5464
5465   if (code == COND_EXPR && slp_node)
5466     return false;
5467
5468   scalar_dest = gimple_assign_lhs (stmt);
5469   scalar_type = TREE_TYPE (scalar_dest);
5470   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5471       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5472     return false;
5473
5474   /* Do not try to vectorize bit-precision reductions.  */
5475   if ((TYPE_PRECISION (scalar_type)
5476        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5477     return false;
5478
5479   /* All uses but the last are expected to be defined in the loop.
5480      The last use is the reduction variable.  In case of nested cycle this
5481      assumption is not true: we use reduc_index to record the index of the
5482      reduction variable.  */
5483   for (i = 0; i < op_type; i++)
5484     {
5485       if (i == reduc_index)
5486         continue;
5487
5488       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5489       if (i == 0 && code == COND_EXPR)
5490         continue;
5491
5492       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5493                                           &def_stmt, &dt, &tem);
5494       if (!vectype_in)
5495         vectype_in = tem;
5496       gcc_assert (is_simple_use);
5497
5498       if (dt != vect_internal_def
5499           && dt != vect_external_def
5500           && dt != vect_constant_def
5501           && dt != vect_induction_def
5502           && !(dt == vect_nested_cycle && nested_cycle))
5503         return false;
5504
5505       if (dt == vect_nested_cycle)
5506         {
5507           found_nested_cycle_def = true;
5508           reduc_def_stmt = def_stmt;
5509           reduc_index = i;
5510         }
5511
5512       if (i == 1 && code == COND_EXPR && dt == vect_induction_def)
5513         cond_expr_induction_def_stmt = def_stmt;
5514     }
5515
5516   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
5517                                       &def_stmt, &dt, &tem);
5518   if (!vectype_in)
5519     vectype_in = tem;
5520   gcc_assert (is_simple_use);
5521   if (!found_nested_cycle_def)
5522     reduc_def_stmt = def_stmt;
5523
5524   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5525     return false;
5526
5527   if (!(dt == vect_reduction_def
5528         || dt == vect_nested_cycle
5529         || ((dt == vect_internal_def || dt == vect_external_def
5530              || dt == vect_constant_def || dt == vect_induction_def)
5531             && nested_cycle && found_nested_cycle_def)))
5532     {
5533       /* For pattern recognized stmts, orig_stmt might be a reduction,
5534          but some helper statements for the pattern might not, or
5535          might be COND_EXPRs with reduction uses in the condition.  */
5536       gcc_assert (orig_stmt);
5537       return false;
5538     }
5539
5540   enum vect_reduction_type v_reduc_type;
5541   gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5542                                           !nested_cycle, &dummy, false,
5543                                           &v_reduc_type);
5544
5545   /* If we have a condition reduction, see if we can simplify it further.  */
5546   if (v_reduc_type == COND_REDUCTION
5547       && cond_expr_induction_def_stmt != NULL
5548       && is_nonwrapping_integer_induction (cond_expr_induction_def_stmt, loop))
5549     {
5550       if (dump_enabled_p ())
5551         dump_printf_loc (MSG_NOTE, vect_location,
5552                          "condition expression based on integer induction.\n");
5553       STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
5554     }
5555   else
5556    STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5557
5558   if (orig_stmt)
5559     gcc_assert (tmp == orig_stmt
5560                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5561   else
5562     /* We changed STMT to be the first stmt in reduction chain, hence we
5563        check that in this case the first element in the chain is STMT.  */
5564     gcc_assert (stmt == tmp
5565                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5566
5567   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5568     return false;
5569
5570   if (slp_node || PURE_SLP_STMT (stmt_info))
5571     ncopies = 1;
5572   else
5573     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5574                / TYPE_VECTOR_SUBPARTS (vectype_in));
5575
5576   gcc_assert (ncopies >= 1);
5577
5578   vec_mode = TYPE_MODE (vectype_in);
5579
5580   if (code == COND_EXPR)
5581     {
5582       /* Only call during the analysis stage, otherwise we'll lose
5583          STMT_VINFO_TYPE.  */
5584       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5585                                                 ops[reduc_index], 0, NULL))
5586         {
5587           if (dump_enabled_p ())
5588             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5589                              "unsupported condition in reduction\n");
5590           return false;
5591         }
5592     }
5593   else
5594     {
5595       /* 4. Supportable by target?  */
5596
5597       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5598           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5599         {
5600           /* Shifts and rotates are only supported by vectorizable_shifts,
5601              not vectorizable_reduction.  */
5602           if (dump_enabled_p ())
5603             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604                              "unsupported shift or rotation.\n");
5605           return false;
5606         }
5607
5608       /* 4.1. check support for the operation in the loop  */
5609       optab = optab_for_tree_code (code, vectype_in, optab_default);
5610       if (!optab)
5611         {
5612           if (dump_enabled_p ())
5613             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5614                              "no optab.\n");
5615
5616           return false;
5617         }
5618
5619       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5620         {
5621           if (dump_enabled_p ())
5622             dump_printf (MSG_NOTE, "op not supported by target.\n");
5623
5624           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5625               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5626                   < vect_min_worthwhile_factor (code))
5627             return false;
5628
5629           if (dump_enabled_p ())
5630             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5631         }
5632
5633       /* Worthwhile without SIMD support?  */
5634       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5635           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5636              < vect_min_worthwhile_factor (code))
5637         {
5638           if (dump_enabled_p ())
5639             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5640                              "not worthwhile without SIMD support.\n");
5641
5642           return false;
5643         }
5644     }
5645
5646   /* 4.2. Check support for the epilog operation.
5647
5648           If STMT represents a reduction pattern, then the type of the
5649           reduction variable may be different than the type of the rest
5650           of the arguments.  For example, consider the case of accumulation
5651           of shorts into an int accumulator; The original code:
5652                         S1: int_a = (int) short_a;
5653           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5654
5655           was replaced with:
5656                         STMT: int_acc = widen_sum <short_a, int_acc>
5657
5658           This means that:
5659           1. The tree-code that is used to create the vector operation in the
5660              epilog code (that reduces the partial results) is not the
5661              tree-code of STMT, but is rather the tree-code of the original
5662              stmt from the pattern that STMT is replacing.  I.e, in the example
5663              above we want to use 'widen_sum' in the loop, but 'plus' in the
5664              epilog.
5665           2. The type (mode) we use to check available target support
5666              for the vector operation to be created in the *epilog*, is
5667              determined by the type of the reduction variable (in the example
5668              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5669              However the type (mode) we use to check available target support
5670              for the vector operation to be created *inside the loop*, is
5671              determined by the type of the other arguments to STMT (in the
5672              example we'd check this: optab_handler (widen_sum_optab,
5673              vect_short_mode)).
5674
5675           This is contrary to "regular" reductions, in which the types of all
5676           the arguments are the same as the type of the reduction variable.
5677           For "regular" reductions we can therefore use the same vector type
5678           (and also the same tree-code) when generating the epilog code and
5679           when generating the code inside the loop.  */
5680
5681   if (orig_stmt)
5682     {
5683       /* This is a reduction pattern: get the vectype from the type of the
5684          reduction variable, and get the tree-code from orig_stmt.  */
5685       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5686                   == TREE_CODE_REDUCTION);
5687       orig_code = gimple_assign_rhs_code (orig_stmt);
5688       gcc_assert (vectype_out);
5689       vec_mode = TYPE_MODE (vectype_out);
5690     }
5691   else
5692     {
5693       /* Regular reduction: use the same vectype and tree-code as used for
5694          the vector code inside the loop can be used for the epilog code. */
5695       orig_code = code;
5696
5697       if (code == MINUS_EXPR)
5698         orig_code = PLUS_EXPR;
5699
5700       /* For simple condition reductions, replace with the actual expression
5701          we want to base our reduction around.  */
5702       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5703           == INTEGER_INDUC_COND_REDUCTION)
5704         orig_code = MAX_EXPR;
5705     }
5706
5707   if (nested_cycle)
5708     {
5709       def_bb = gimple_bb (reduc_def_stmt);
5710       def_stmt_loop = def_bb->loop_father;
5711       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5712                                        loop_preheader_edge (def_stmt_loop));
5713       if (TREE_CODE (def_arg) == SSA_NAME
5714           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5715           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5716           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5717           && vinfo_for_stmt (def_arg_stmt)
5718           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5719               == vect_double_reduction_def)
5720         double_reduc = true;
5721     }
5722
5723   epilog_reduc_code = ERROR_MARK;
5724
5725   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
5726       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5727                 == INTEGER_INDUC_COND_REDUCTION)
5728     {
5729       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5730         {
5731           reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5732                                          optab_default);
5733           if (!reduc_optab)
5734             {
5735               if (dump_enabled_p ())
5736                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5737                                  "no optab for reduction.\n");
5738
5739               epilog_reduc_code = ERROR_MARK;
5740             }
5741           else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5742             {
5743               optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5744               if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5745                 {
5746                   if (dump_enabled_p ())
5747                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5748                                      "reduc op not supported by target.\n");
5749
5750                   epilog_reduc_code = ERROR_MARK;
5751                 }
5752             }
5753
5754           /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5755              generated in the epilog using multiple expressions.  This does not
5756              work for condition reductions.  */
5757           if (epilog_reduc_code == ERROR_MARK
5758               && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5759                         == INTEGER_INDUC_COND_REDUCTION)
5760             {
5761               if (dump_enabled_p ())
5762                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5763                                  "no reduc code for scalar code.\n");
5764               return false;
5765             }
5766         }
5767       else
5768         {
5769           if (!nested_cycle || double_reduc)
5770             {
5771               if (dump_enabled_p ())
5772                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773                                  "no reduc code for scalar code.\n");
5774
5775               return false;
5776             }
5777         }
5778     }
5779   else
5780     {
5781       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
5782       cr_index_scalar_type = make_unsigned_type (scalar_precision);
5783       cr_index_vector_type = build_vector_type
5784         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
5785
5786       epilog_reduc_code = REDUC_MAX_EXPR;
5787       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
5788                                    optab_default);
5789       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
5790           == CODE_FOR_nothing)
5791         {
5792           if (dump_enabled_p ())
5793             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5794                              "reduc max op not supported by target.\n");
5795           return false;
5796         }
5797     }
5798
5799   if ((double_reduc
5800        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
5801        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5802                 == INTEGER_INDUC_COND_REDUCTION)
5803       && ncopies > 1)
5804     {
5805       if (dump_enabled_p ())
5806         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5807                          "multiple types in double reduction or condition "
5808                          "reduction.\n");
5809       return false;
5810     }
5811
5812   /* In case of widenning multiplication by a constant, we update the type
5813      of the constant to be the type of the other operand.  We check that the
5814      constant fits the type in the pattern recognition pass.  */
5815   if (code == DOT_PROD_EXPR
5816       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5817     {
5818       if (TREE_CODE (ops[0]) == INTEGER_CST)
5819         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5820       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5821         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5822       else
5823         {
5824           if (dump_enabled_p ())
5825             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5826                              "invalid types in dot-prod\n");
5827
5828           return false;
5829         }
5830     }
5831
5832   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5833     {
5834       widest_int ni;
5835
5836       if (! max_loop_iterations (loop, &ni))
5837         {
5838           if (dump_enabled_p ())
5839             dump_printf_loc (MSG_NOTE, vect_location,
5840                              "loop count not known, cannot create cond "
5841                              "reduction.\n");
5842           return false;
5843         }
5844       /* Convert backedges to iterations.  */
5845       ni += 1;
5846
5847       /* The additional index will be the same type as the condition.  Check
5848          that the loop can fit into this less one (because we'll use up the
5849          zero slot for when there are no matches).  */
5850       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
5851       if (wi::geu_p (ni, wi::to_widest (max_index)))
5852         {
5853           if (dump_enabled_p ())
5854             dump_printf_loc (MSG_NOTE, vect_location,
5855                              "loop size is greater than data size.\n");
5856           return false;
5857         }
5858     }
5859
5860   if (!vec_stmt) /* transformation not required.  */
5861     {
5862       if (first_p
5863           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5864                                          reduc_index))
5865         return false;
5866       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5867       return true;
5868     }
5869
5870   /** Transform.  **/
5871
5872   if (dump_enabled_p ())
5873     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5874
5875   /* FORNOW: Multiple types are not supported for condition.  */
5876   if (code == COND_EXPR)
5877     gcc_assert (ncopies == 1);
5878
5879   /* Create the destination vector  */
5880   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5881
5882   /* In case the vectorization factor (VF) is bigger than the number
5883      of elements that we can fit in a vectype (nunits), we have to generate
5884      more than one vector stmt - i.e - we need to "unroll" the
5885      vector stmt by a factor VF/nunits.  For more details see documentation
5886      in vectorizable_operation.  */
5887
5888   /* If the reduction is used in an outer loop we need to generate
5889      VF intermediate results, like so (e.g. for ncopies=2):
5890         r0 = phi (init, r0)
5891         r1 = phi (init, r1)
5892         r0 = x0 + r0;
5893         r1 = x1 + r1;
5894     (i.e. we generate VF results in 2 registers).
5895     In this case we have a separate def-use cycle for each copy, and therefore
5896     for each copy we get the vector def for the reduction variable from the
5897     respective phi node created for this copy.
5898
5899     Otherwise (the reduction is unused in the loop nest), we can combine
5900     together intermediate results, like so (e.g. for ncopies=2):
5901         r = phi (init, r)
5902         r = x0 + r;
5903         r = x1 + r;
5904    (i.e. we generate VF/2 results in a single register).
5905    In this case for each copy we get the vector def for the reduction variable
5906    from the vectorized reduction operation generated in the previous iteration.
5907   */
5908
5909   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5910     {
5911       single_defuse_cycle = true;
5912       epilog_copies = 1;
5913     }
5914   else
5915     epilog_copies = ncopies;
5916
5917   prev_stmt_info = NULL;
5918   prev_phi_info = NULL;
5919   if (slp_node)
5920     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5921   else
5922     {
5923       vec_num = 1;
5924       vec_oprnds0.create (1);
5925       if (op_type == ternary_op)
5926         vec_oprnds1.create (1);
5927     }
5928
5929   phis.create (vec_num);
5930   vect_defs.create (vec_num);
5931   if (!slp_node)
5932     vect_defs.quick_push (NULL_TREE);
5933
5934   for (j = 0; j < ncopies; j++)
5935     {
5936       if (j == 0 || !single_defuse_cycle)
5937         {
5938           for (i = 0; i < vec_num; i++)
5939             {
5940               /* Create the reduction-phi that defines the reduction
5941                  operand.  */
5942               new_phi = create_phi_node (vec_dest, loop->header);
5943               set_vinfo_for_stmt (new_phi,
5944                                   new_stmt_vec_info (new_phi, loop_vinfo));
5945                if (j == 0 || slp_node)
5946                  phis.quick_push (new_phi);
5947             }
5948         }
5949
5950       if (code == COND_EXPR)
5951         {
5952           gcc_assert (!slp_node);
5953           vectorizable_condition (stmt, gsi, vec_stmt,
5954                                   PHI_RESULT (phis[0]),
5955                                   reduc_index, NULL);
5956           /* Multiple types are not supported for condition.  */
5957           break;
5958         }
5959
5960       /* Handle uses.  */
5961       if (j == 0)
5962         {
5963           op0 = ops[!reduc_index];
5964           if (op_type == ternary_op)
5965             {
5966               if (reduc_index == 0)
5967                 op1 = ops[2];
5968               else
5969                 op1 = ops[1];
5970             }
5971
5972           if (slp_node)
5973             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5974                                slp_node, -1);
5975           else
5976             {
5977               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5978                                                             stmt);
5979               vec_oprnds0.quick_push (loop_vec_def0);
5980               if (op_type == ternary_op)
5981                {
5982                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
5983                  vec_oprnds1.quick_push (loop_vec_def1);
5984                }
5985             }
5986         }
5987       else
5988         {
5989           if (!slp_node)
5990             {
5991               enum vect_def_type dt;
5992               gimple *dummy_stmt;
5993
5994               vect_is_simple_use (ops[!reduc_index], loop_vinfo,
5995                                   &dummy_stmt, &dt);
5996               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5997                                                               loop_vec_def0);
5998               vec_oprnds0[0] = loop_vec_def0;
5999               if (op_type == ternary_op)
6000                 {
6001                   vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
6002                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
6003                                                                 loop_vec_def1);
6004                   vec_oprnds1[0] = loop_vec_def1;
6005                 }
6006             }
6007
6008           if (single_defuse_cycle)
6009             reduc_def = gimple_assign_lhs (new_stmt);
6010
6011           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
6012         }
6013
6014       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
6015         {
6016           if (slp_node)
6017             reduc_def = PHI_RESULT (phis[i]);
6018           else
6019             {
6020               if (!single_defuse_cycle || j == 0)
6021                 reduc_def = PHI_RESULT (new_phi);
6022             }
6023
6024           def1 = ((op_type == ternary_op)
6025                   ? vec_oprnds1[i] : NULL);
6026           if (op_type == binary_op)
6027             {
6028               if (reduc_index == 0)
6029                 expr = build2 (code, vectype_out, reduc_def, def0);
6030               else
6031                 expr = build2 (code, vectype_out, def0, reduc_def);
6032             }
6033           else
6034             {
6035               if (reduc_index == 0)
6036                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
6037               else
6038                 {
6039                   if (reduc_index == 1)
6040                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
6041                   else
6042                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
6043                 }
6044             }
6045
6046           new_stmt = gimple_build_assign (vec_dest, expr);
6047           new_temp = make_ssa_name (vec_dest, new_stmt);
6048           gimple_assign_set_lhs (new_stmt, new_temp);
6049           vect_finish_stmt_generation (stmt, new_stmt, gsi);
6050
6051           if (slp_node)
6052             {
6053               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6054               vect_defs.quick_push (new_temp);
6055             }
6056           else
6057             vect_defs[0] = new_temp;
6058         }
6059
6060       if (slp_node)
6061         continue;
6062
6063       if (j == 0)
6064         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6065       else
6066         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6067
6068       prev_stmt_info = vinfo_for_stmt (new_stmt);
6069       prev_phi_info = vinfo_for_stmt (new_phi);
6070     }
6071
6072   tree indx_before_incr, indx_after_incr, cond_name = NULL;
6073
6074   /* Finalize the reduction-phi (set its arguments) and create the
6075      epilog reduction code.  */
6076   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6077     {
6078       new_temp = gimple_assign_lhs (*vec_stmt);
6079       vect_defs[0] = new_temp;
6080
6081       /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
6082          which is updated with the current index of the loop for every match of
6083          the original loop's cond_expr (VEC_STMT).  This results in a vector
6084          containing the last time the condition passed for that vector lane.
6085          The first match will be a 1 to allow 0 to be used for non-matching
6086          indexes.  If there are no matches at all then the vector will be all
6087          zeroes.  */
6088       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6089         {
6090           int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6091           int k;
6092
6093           gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6094
6095           /* First we create a simple vector induction variable which starts
6096              with the values {1,2,3,...} (SERIES_VECT) and increments by the
6097              vector size (STEP).  */
6098
6099           /* Create a {1,2,3,...} vector.  */
6100           tree *vtemp = XALLOCAVEC (tree, nunits_out);
6101           for (k = 0; k < nunits_out; ++k)
6102             vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6103           tree series_vect = build_vector (cr_index_vector_type, vtemp);
6104
6105           /* Create a vector of the step value.  */
6106           tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6107           tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6108
6109           /* Create an induction variable.  */
6110           gimple_stmt_iterator incr_gsi;
6111           bool insert_after;
6112           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6113           create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6114                      insert_after, &indx_before_incr, &indx_after_incr);
6115
6116           /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6117              filled with zeros (VEC_ZERO).  */
6118
6119           /* Create a vector of 0s.  */
6120           tree zero = build_zero_cst (cr_index_scalar_type);
6121           tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6122
6123           /* Create a vector phi node.  */
6124           tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6125           new_phi = create_phi_node (new_phi_tree, loop->header);
6126           set_vinfo_for_stmt (new_phi,
6127                               new_stmt_vec_info (new_phi, loop_vinfo));
6128           add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6129                        UNKNOWN_LOCATION);
6130
6131           /* Now take the condition from the loops original cond_expr
6132              (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6133              every match uses values from the induction variable
6134              (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6135              (NEW_PHI_TREE).
6136              Finally, we update the phi (NEW_PHI_TREE) to take the value of
6137              the new cond_expr (INDEX_COND_EXPR).  */
6138
6139           /* Turn the condition from vec_stmt into an ssa name.  */
6140           gimple_stmt_iterator vec_stmt_gsi = gsi_for_stmt (*vec_stmt);
6141           tree ccompare = gimple_assign_rhs1 (*vec_stmt);
6142           tree ccompare_name = make_ssa_name (TREE_TYPE (ccompare));
6143           gimple *ccompare_stmt = gimple_build_assign (ccompare_name,
6144                                                        ccompare);
6145           gsi_insert_before (&vec_stmt_gsi, ccompare_stmt, GSI_SAME_STMT);
6146           gimple_assign_set_rhs1 (*vec_stmt, ccompare_name);
6147           update_stmt (*vec_stmt);
6148
6149           /* Create a conditional, where the condition is taken from vec_stmt
6150              (CCOMPARE_NAME), then is the induction index (INDEX_BEFORE_INCR)
6151              and else is the phi (NEW_PHI_TREE).  */
6152           tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6153                                          ccompare_name, indx_before_incr,
6154                                          new_phi_tree);
6155           cond_name = make_ssa_name (cr_index_vector_type);
6156           gimple *index_condition = gimple_build_assign (cond_name,
6157                                                          index_cond_expr);
6158           gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6159           stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6160                                                             loop_vinfo);
6161           STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6162           set_vinfo_for_stmt (index_condition, index_vec_info);
6163
6164           /* Update the phi with the vec cond.  */
6165           add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6166                        UNKNOWN_LOCATION);
6167         }
6168     }
6169
6170   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6171                                     epilog_reduc_code, phis, reduc_index,
6172                                     double_reduc, slp_node, cond_name);
6173
6174   return true;
6175 }
6176
6177 /* Function vect_min_worthwhile_factor.
6178
6179    For a loop where we could vectorize the operation indicated by CODE,
6180    return the minimum vectorization factor that makes it worthwhile
6181    to use generic vectors.  */
6182 int
6183 vect_min_worthwhile_factor (enum tree_code code)
6184 {
6185   switch (code)
6186     {
6187     case PLUS_EXPR:
6188     case MINUS_EXPR:
6189     case NEGATE_EXPR:
6190       return 4;
6191
6192     case BIT_AND_EXPR:
6193     case BIT_IOR_EXPR:
6194     case BIT_XOR_EXPR:
6195     case BIT_NOT_EXPR:
6196       return 2;
6197
6198     default:
6199       return INT_MAX;
6200     }
6201 }
6202
6203
6204 /* Function vectorizable_induction
6205
6206    Check if PHI performs an induction computation that can be vectorized.
6207    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6208    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6209    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6210
6211 bool
6212 vectorizable_induction (gimple *phi,
6213                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6214                         gimple **vec_stmt)
6215 {
6216   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6217   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6218   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6219   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6220   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6221   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6222   tree vec_def;
6223
6224   gcc_assert (ncopies >= 1);
6225   /* FORNOW. These restrictions should be relaxed.  */
6226   if (nested_in_vect_loop_p (loop, phi))
6227     {
6228       imm_use_iterator imm_iter;
6229       use_operand_p use_p;
6230       gimple *exit_phi;
6231       edge latch_e;
6232       tree loop_arg;
6233
6234       if (ncopies > 1)
6235         {
6236           if (dump_enabled_p ())
6237             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6238                              "multiple types in nested loop.\n");
6239           return false;
6240         }
6241
6242       exit_phi = NULL;
6243       latch_e = loop_latch_edge (loop->inner);
6244       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6245       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6246         {
6247           gimple *use_stmt = USE_STMT (use_p);
6248           if (is_gimple_debug (use_stmt))
6249             continue;
6250
6251           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6252             {
6253               exit_phi = use_stmt;
6254               break;
6255             }
6256         }
6257       if (exit_phi)
6258         {
6259           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6260           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6261                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6262             {
6263               if (dump_enabled_p ())
6264                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6265                                  "inner-loop induction only used outside "
6266                                  "of the outer vectorized loop.\n");
6267               return false;
6268             }
6269         }
6270     }
6271
6272   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6273     return false;
6274
6275   /* FORNOW: SLP not supported.  */
6276   if (STMT_SLP_TYPE (stmt_info))
6277     return false;
6278
6279   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
6280
6281   if (gimple_code (phi) != GIMPLE_PHI)
6282     return false;
6283
6284   if (!vec_stmt) /* transformation not required.  */
6285     {
6286       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6287       if (dump_enabled_p ())
6288         dump_printf_loc (MSG_NOTE, vect_location,
6289                          "=== vectorizable_induction ===\n");
6290       vect_model_induction_cost (stmt_info, ncopies);
6291       return true;
6292     }
6293
6294   /** Transform.  **/
6295
6296   if (dump_enabled_p ())
6297     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6298
6299   vec_def = get_initial_def_for_induction (phi);
6300   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
6301   return true;
6302 }
6303
6304 /* Function vectorizable_live_operation.
6305
6306    STMT computes a value that is used outside the loop.  Check if
6307    it can be supported.  */
6308
6309 bool
6310 vectorizable_live_operation (gimple *stmt,
6311                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6312                              gimple **vec_stmt)
6313 {
6314   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6315   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6316   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6317   tree op;
6318   gimple *def_stmt;
6319   ssa_op_iter iter;
6320
6321   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6322
6323   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6324     return false;
6325
6326   if (!is_gimple_assign (stmt))
6327     {
6328       if (gimple_call_internal_p (stmt)
6329           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
6330           && gimple_call_lhs (stmt)
6331           && loop->simduid
6332           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
6333           && loop->simduid
6334              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
6335         {
6336           edge e = single_exit (loop);
6337           basic_block merge_bb = e->dest;
6338           imm_use_iterator imm_iter;
6339           use_operand_p use_p;
6340           tree lhs = gimple_call_lhs (stmt);
6341
6342           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
6343             {
6344               gimple *use_stmt = USE_STMT (use_p);
6345               if (gimple_code (use_stmt) == GIMPLE_PHI
6346                   && gimple_bb (use_stmt) == merge_bb)
6347                 {
6348                   if (vec_stmt)
6349                     {
6350                       tree vfm1
6351                         = build_int_cst (unsigned_type_node,
6352                                          loop_vinfo->vectorization_factor - 1);
6353                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
6354                     }
6355                   return true;
6356                 }
6357             }
6358         }
6359
6360       return false;
6361     }
6362
6363   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6364     return false;
6365
6366   /* FORNOW. CHECKME. */
6367   if (nested_in_vect_loop_p (loop, stmt))
6368     return false;
6369
6370   /* FORNOW: support only if all uses are invariant.  This means
6371      that the scalar operations can remain in place, unvectorized.
6372      The original last scalar value that they compute will be used.  */
6373   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
6374     {
6375       enum vect_def_type dt = vect_uninitialized_def;
6376
6377       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
6378         {
6379           if (dump_enabled_p ())
6380             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6381                              "use not simple.\n");
6382           return false;
6383         }
6384
6385       if (dt != vect_external_def && dt != vect_constant_def)
6386         return false;
6387     }
6388
6389   /* No transformation is required for the cases we currently support.  */
6390   return true;
6391 }
6392
6393 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
6394
6395 static void
6396 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6397 {
6398   ssa_op_iter op_iter;
6399   imm_use_iterator imm_iter;
6400   def_operand_p def_p;
6401   gimple *ustmt;
6402
6403   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6404     {
6405       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6406         {
6407           basic_block bb;
6408
6409           if (!is_gimple_debug (ustmt))
6410             continue;
6411
6412           bb = gimple_bb (ustmt);
6413
6414           if (!flow_bb_inside_loop_p (loop, bb))
6415             {
6416               if (gimple_debug_bind_p (ustmt))
6417                 {
6418                   if (dump_enabled_p ())
6419                     dump_printf_loc (MSG_NOTE, vect_location,
6420                                      "killing debug use\n");
6421
6422                   gimple_debug_bind_reset_value (ustmt);
6423                   update_stmt (ustmt);
6424                 }
6425               else
6426                 gcc_unreachable ();
6427             }
6428         }
6429     }
6430 }
6431
6432
6433 /* This function builds ni_name = number of iterations.  Statements
6434    are emitted on the loop preheader edge.  */
6435
6436 static tree
6437 vect_build_loop_niters (loop_vec_info loop_vinfo)
6438 {
6439   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6440   if (TREE_CODE (ni) == INTEGER_CST)
6441     return ni;
6442   else
6443     {
6444       tree ni_name, var;
6445       gimple_seq stmts = NULL;
6446       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6447
6448       var = create_tmp_var (TREE_TYPE (ni), "niters");
6449       ni_name = force_gimple_operand (ni, &stmts, false, var);
6450       if (stmts)
6451         gsi_insert_seq_on_edge_immediate (pe, stmts);
6452
6453       return ni_name;
6454     }
6455 }
6456
6457
6458 /* This function generates the following statements:
6459
6460    ni_name = number of iterations loop executes
6461    ratio = ni_name / vf
6462    ratio_mult_vf_name = ratio * vf
6463
6464    and places them on the loop preheader edge.  */
6465
6466 static void
6467 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6468                                  tree ni_name,
6469                                  tree *ratio_mult_vf_name_ptr,
6470                                  tree *ratio_name_ptr)
6471 {
6472   tree ni_minus_gap_name;
6473   tree var;
6474   tree ratio_name;
6475   tree ratio_mult_vf_name;
6476   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6477   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
6478   tree log_vf;
6479
6480   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
6481
6482   /* If epilogue loop is required because of data accesses with gaps, we
6483      subtract one iteration from the total number of iterations here for
6484      correct calculation of RATIO.  */
6485   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6486     {
6487       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6488                                        ni_name,
6489                                        build_one_cst (TREE_TYPE (ni_name)));
6490       if (!is_gimple_val (ni_minus_gap_name))
6491         {
6492           var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
6493           gimple *stmts = NULL;
6494           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
6495                                                     true, var);
6496           gsi_insert_seq_on_edge_immediate (pe, stmts);
6497         }
6498     }
6499   else
6500     ni_minus_gap_name = ni_name;
6501
6502   /* Create: ratio = ni >> log2(vf) */
6503   /* ???  As we have ni == number of latch executions + 1, ni could
6504      have overflown to zero.  So avoid computing ratio based on ni
6505      but compute it using the fact that we know ratio will be at least
6506      one, thus via (ni - vf) >> log2(vf) + 1.  */
6507   ratio_name
6508     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
6509                    fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
6510                                 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
6511                                              ni_minus_gap_name,
6512                                              build_int_cst
6513                                                (TREE_TYPE (ni_name), vf)),
6514                                 log_vf),
6515                    build_int_cst (TREE_TYPE (ni_name), 1));
6516   if (!is_gimple_val (ratio_name))
6517     {
6518       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
6519       gimple *stmts = NULL;
6520       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
6521       gsi_insert_seq_on_edge_immediate (pe, stmts);
6522     }
6523   *ratio_name_ptr = ratio_name;
6524
6525   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
6526
6527   if (ratio_mult_vf_name_ptr)
6528     {
6529       ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6530                                         ratio_name, log_vf);
6531       if (!is_gimple_val (ratio_mult_vf_name))
6532         {
6533           var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
6534           gimple *stmts = NULL;
6535           ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
6536                                                      true, var);
6537           gsi_insert_seq_on_edge_immediate (pe, stmts);
6538         }
6539       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6540     }
6541
6542   return;
6543 }
6544
6545
6546 /* Function vect_transform_loop.
6547
6548    The analysis phase has determined that the loop is vectorizable.
6549    Vectorize the loop - created vectorized stmts to replace the scalar
6550    stmts in the loop, and update the loop exit condition.  */
6551
6552 void
6553 vect_transform_loop (loop_vec_info loop_vinfo)
6554 {
6555   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6556   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6557   int nbbs = loop->num_nodes;
6558   int i;
6559   tree ratio = NULL;
6560   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6561   bool grouped_store;
6562   bool slp_scheduled = false;
6563   gimple *stmt, *pattern_stmt;
6564   gimple_seq pattern_def_seq = NULL;
6565   gimple_stmt_iterator pattern_def_si = gsi_none ();
6566   bool transform_pattern_stmt = false;
6567   bool check_profitability = false;
6568   int th;
6569   /* Record number of iterations before we started tampering with the profile. */
6570   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
6571
6572   if (dump_enabled_p ())
6573     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6574
6575   /* If profile is inprecise, we have chance to fix it up.  */
6576   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6577     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
6578
6579   /* Use the more conservative vectorization threshold.  If the number
6580      of iterations is constant assume the cost check has been performed
6581      by our caller.  If the threshold makes all loops profitable that
6582      run at least the vectorization factor number of times checking
6583      is pointless, too.  */
6584   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6585   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6586       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6587     {
6588       if (dump_enabled_p ())
6589         dump_printf_loc (MSG_NOTE, vect_location,
6590                          "Profitability threshold is %d loop iterations.\n",
6591                          th);
6592       check_profitability = true;
6593     }
6594
6595   /* Version the loop first, if required, so the profitability check
6596      comes first.  */
6597
6598   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
6599       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
6600     {
6601       vect_loop_versioning (loop_vinfo, th, check_profitability);
6602       check_profitability = false;
6603     }
6604
6605   tree ni_name = vect_build_loop_niters (loop_vinfo);
6606   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
6607
6608   /* Peel the loop if there are data refs with unknown alignment.
6609      Only one data ref with unknown store is allowed.  */
6610
6611   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
6612     {
6613       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
6614                                      th, check_profitability);
6615       check_profitability = false;
6616       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
6617          be re-computed.  */
6618       ni_name = NULL_TREE;
6619     }
6620
6621   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6622      compile time constant), or it is a constant that doesn't divide by the
6623      vectorization factor, then an epilog loop needs to be created.
6624      We therefore duplicate the loop: the original loop will be vectorized,
6625      and will compute the first (n/VF) iterations.  The second copy of the loop
6626      will remain scalar and will compute the remaining (n%VF) iterations.
6627      (VF is the vectorization factor).  */
6628
6629   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6630       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6631     {
6632       tree ratio_mult_vf;
6633       if (!ni_name)
6634         ni_name = vect_build_loop_niters (loop_vinfo);
6635       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6636                                        &ratio);
6637       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6638                                       th, check_profitability);
6639     }
6640   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6641     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6642                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6643   else
6644     {
6645       if (!ni_name)
6646         ni_name = vect_build_loop_niters (loop_vinfo);
6647       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6648     }
6649
6650   /* 1) Make sure the loop header has exactly two entries
6651      2) Make sure we have a preheader basic block.  */
6652
6653   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6654
6655   split_edge (loop_preheader_edge (loop));
6656
6657   /* FORNOW: the vectorizer supports only loops which body consist
6658      of one basic block (header + empty latch). When the vectorizer will
6659      support more involved loop forms, the order by which the BBs are
6660      traversed need to be reconsidered.  */
6661
6662   for (i = 0; i < nbbs; i++)
6663     {
6664       basic_block bb = bbs[i];
6665       stmt_vec_info stmt_info;
6666
6667       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6668            gsi_next (&si))
6669         {
6670           gphi *phi = si.phi ();
6671           if (dump_enabled_p ())
6672             {
6673               dump_printf_loc (MSG_NOTE, vect_location,
6674                                "------>vectorizing phi: ");
6675               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6676               dump_printf (MSG_NOTE, "\n");
6677             }
6678           stmt_info = vinfo_for_stmt (phi);
6679           if (!stmt_info)
6680             continue;
6681
6682           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6683             vect_loop_kill_debug_uses (loop, phi);
6684
6685           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6686               && !STMT_VINFO_LIVE_P (stmt_info))
6687             continue;
6688
6689           if (STMT_VINFO_VECTYPE (stmt_info)
6690               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6691                   != (unsigned HOST_WIDE_INT) vectorization_factor)
6692               && dump_enabled_p ())
6693             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6694
6695           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6696             {
6697               if (dump_enabled_p ())
6698                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6699               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6700             }
6701         }
6702
6703       pattern_stmt = NULL;
6704       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6705            !gsi_end_p (si) || transform_pattern_stmt;)
6706         {
6707           bool is_store;
6708
6709           if (transform_pattern_stmt)
6710             stmt = pattern_stmt;
6711           else
6712             {
6713               stmt = gsi_stmt (si);
6714               /* During vectorization remove existing clobber stmts.  */
6715               if (gimple_clobber_p (stmt))
6716                 {
6717                   unlink_stmt_vdef (stmt);
6718                   gsi_remove (&si, true);
6719                   release_defs (stmt);
6720                   continue;
6721                 }
6722             }
6723
6724           if (dump_enabled_p ())
6725             {
6726               dump_printf_loc (MSG_NOTE, vect_location,
6727                                "------>vectorizing statement: ");
6728               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6729               dump_printf (MSG_NOTE, "\n");
6730             }
6731
6732           stmt_info = vinfo_for_stmt (stmt);
6733
6734           /* vector stmts created in the outer-loop during vectorization of
6735              stmts in an inner-loop may not have a stmt_info, and do not
6736              need to be vectorized.  */
6737           if (!stmt_info)
6738             {
6739               gsi_next (&si);
6740               continue;
6741             }
6742
6743           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6744             vect_loop_kill_debug_uses (loop, stmt);
6745
6746           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6747               && !STMT_VINFO_LIVE_P (stmt_info))
6748             {
6749               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6750                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6751                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6752                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6753                 {
6754                   stmt = pattern_stmt;
6755                   stmt_info = vinfo_for_stmt (stmt);
6756                 }
6757               else
6758                 {
6759                   gsi_next (&si);
6760                   continue;
6761                 }
6762             }
6763           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6764                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6765                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6766                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6767             transform_pattern_stmt = true;
6768
6769           /* If pattern statement has def stmts, vectorize them too.  */
6770           if (is_pattern_stmt_p (stmt_info))
6771             {
6772               if (pattern_def_seq == NULL)
6773                 {
6774                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6775                   pattern_def_si = gsi_start (pattern_def_seq);
6776                 }
6777               else if (!gsi_end_p (pattern_def_si))
6778                 gsi_next (&pattern_def_si);
6779               if (pattern_def_seq != NULL)
6780                 {
6781                   gimple *pattern_def_stmt = NULL;
6782                   stmt_vec_info pattern_def_stmt_info = NULL;
6783
6784                   while (!gsi_end_p (pattern_def_si))
6785                     {
6786                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6787                       pattern_def_stmt_info
6788                         = vinfo_for_stmt (pattern_def_stmt);
6789                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6790                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6791                         break;
6792                       gsi_next (&pattern_def_si);
6793                     }
6794
6795                   if (!gsi_end_p (pattern_def_si))
6796                     {
6797                       if (dump_enabled_p ())
6798                         {
6799                           dump_printf_loc (MSG_NOTE, vect_location,
6800                                            "==> vectorizing pattern def "
6801                                            "stmt: ");
6802                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6803                                             pattern_def_stmt, 0);
6804                           dump_printf (MSG_NOTE, "\n");
6805                         }
6806
6807                       stmt = pattern_def_stmt;
6808                       stmt_info = pattern_def_stmt_info;
6809                     }
6810                   else
6811                     {
6812                       pattern_def_si = gsi_none ();
6813                       transform_pattern_stmt = false;
6814                     }
6815                 }
6816               else
6817                 transform_pattern_stmt = false;
6818             }
6819
6820           if (STMT_VINFO_VECTYPE (stmt_info))
6821             {
6822               unsigned int nunits
6823                 = (unsigned int)
6824                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6825               if (!STMT_SLP_TYPE (stmt_info)
6826                   && nunits != (unsigned int) vectorization_factor
6827                   && dump_enabled_p ())
6828                   /* For SLP VF is set according to unrolling factor, and not
6829                      to vector size, hence for SLP this print is not valid.  */
6830                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6831             }
6832
6833           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6834              reached.  */
6835           if (STMT_SLP_TYPE (stmt_info))
6836             {
6837               if (!slp_scheduled)
6838                 {
6839                   slp_scheduled = true;
6840
6841                   if (dump_enabled_p ())
6842                     dump_printf_loc (MSG_NOTE, vect_location,
6843                                      "=== scheduling SLP instances ===\n");
6844
6845                   vect_schedule_slp (loop_vinfo);
6846                 }
6847
6848               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6849               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6850                 {
6851                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6852                     {
6853                       pattern_def_seq = NULL;
6854                       gsi_next (&si);
6855                     }
6856                   continue;
6857                 }
6858             }
6859
6860           /* -------- vectorize statement ------------ */
6861           if (dump_enabled_p ())
6862             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6863
6864           grouped_store = false;
6865           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6866           if (is_store)
6867             {
6868               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6869                 {
6870                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6871                      interleaving chain was completed - free all the stores in
6872                      the chain.  */
6873                   gsi_next (&si);
6874                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6875                 }
6876               else
6877                 {
6878                   /* Free the attached stmt_vec_info and remove the stmt.  */
6879                   gimple *store = gsi_stmt (si);
6880                   free_stmt_vec_info (store);
6881                   unlink_stmt_vdef (store);
6882                   gsi_remove (&si, true);
6883                   release_defs (store);
6884                 }
6885
6886               /* Stores can only appear at the end of pattern statements.  */
6887               gcc_assert (!transform_pattern_stmt);
6888               pattern_def_seq = NULL;
6889             }
6890           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6891             {
6892               pattern_def_seq = NULL;
6893               gsi_next (&si);
6894             }
6895         }                       /* stmts in BB */
6896     }                           /* BBs in loop */
6897
6898   slpeel_make_loop_iterate_ntimes (loop, ratio);
6899
6900   /* Reduce loop iterations by the vectorization factor.  */
6901   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6902                       expected_iterations / vectorization_factor);
6903   loop->nb_iterations_upper_bound
6904     = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6905   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6906       && loop->nb_iterations_upper_bound != 0)
6907     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6908   if (loop->any_estimate)
6909     {
6910       loop->nb_iterations_estimate
6911         = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6912        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6913            && loop->nb_iterations_estimate != 0)
6914          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6915     }
6916
6917   if (dump_enabled_p ())
6918     {
6919       dump_printf_loc (MSG_NOTE, vect_location,
6920                        "LOOP VECTORIZED\n");
6921       if (loop->inner)
6922         dump_printf_loc (MSG_NOTE, vect_location,
6923                          "OUTER LOOP VECTORIZED\n");
6924       dump_printf (MSG_NOTE, "\n");
6925     }
6926 }