gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "cfghooks.h"
  31 #include "tree-pass.h"
  32 #include "ssa.h"
  33 #include "optabs-tree.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "cfganal.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-ssa-loop.h"
  45 #include "cfgloop.h"
  46 #include "params.h"
  47 #include "tree-scalar-evolution.h"
  48 #include "tree-vectorizer.h"
  49 #include "gimple-fold.h"
  50 #include "cgraph.h"
  51 #include "tree-cfg.h"
  52 #include "tree-if-conv.h"
  53
  54 /* Loop Vectorization Pass.
  55
  56    This pass tries to vectorize loops.
  57
  58    For example, the vectorizer transforms the following simple loop:
  59
  60         short a[N]; short b[N]; short c[N]; int i;
  61
  62         for (i=0; i<N; i++){
  63           a[i] = b[i] + c[i];
  64         }
  65
  66    as if it was manually vectorized by rewriting the source code into:
  67
  68         typedef int __attribute__((mode(V8HI))) v8hi;
  69         short a[N];  short b[N]; short c[N];   int i;
  70         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  71         v8hi va, vb, vc;
  72
  73         for (i=0; i<N/8; i++){
  74           vb = pb[i];
  75           vc = pc[i];
  76           va = vb + vc;
  77           pa[i] = va;
  78         }
  79
  80         The main entry to this pass is vectorize_loops(), in which
  81    the vectorizer applies a set of analyses on a given set of loops,
  82    followed by the actual vectorization transformation for the loops that
  83    had successfully passed the analysis phase.
  84         Throughout this pass we make a distinction between two types of
  85    data: scalars (which are represented by SSA_NAMES), and memory references
  86    ("data-refs").  These two types of data require different handling both
  87    during analysis and transformation. The types of data-refs that the
  88    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  89    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  90    accesses are required to have a simple (consecutive) access pattern.
  91
  92    Analysis phase:
  93    ===============
  94         The driver for the analysis phase is vect_analyze_loop().
  95    It applies a set of analyses, some of which rely on the scalar evolution
  96    analyzer (scev) developed by Sebastian Pop.
  97
  98         During the analysis phase the vectorizer records some information
  99    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
 100    loop, as well as general information about the loop as a whole, which is
 101    recorded in a "loop_vec_info" struct attached to each loop.
 102
 103    Transformation phase:
 104    =====================
 105         The loop transformation phase scans all the stmts in the loop, and
 106    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 107    the loop that needs to be vectorized.  It inserts the vector code sequence
 108    just before the scalar stmt S, and records a pointer to the vector code
 109    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 110    attached to S).  This pointer will be used for the vectorization of following
 111    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 112    otherwise, we rely on dead code elimination for removing it.
 113
 114         For example, say stmt S1 was vectorized into stmt VS1:
 115
 116    VS1: vb = px[i];
 117    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 118    S2:  a = b;
 119
 120    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 121    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 122    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 123    resulting sequence would be:
 124
 125    VS1: vb = px[i];
 126    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 127    VS2: va = vb;
 128    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 129
 130         Operands that are not SSA_NAMEs, are data-refs that appear in
 131    load/store operations (like 'x[i]' in S1), and are handled differently.
 132
 133    Target modeling:
 134    =================
 135         Currently the only target specific information that is used is the
 136    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 137    Targets that can support different sizes of vectors, for now will need
 138    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 139    flexibility will be added in the future.
 140
 141         Since we only vectorize operations which vector form can be
 142    expressed using existing tree codes, to verify that an operation is
 143    supported, the vectorizer checks the relevant optab at the relevant
 144    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 145    the value found is CODE_FOR_nothing, then there's no target support, and
 146    we can't vectorize the stmt.
 147
 148    For additional information on this project see:
 149    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 150 */
 151
 152 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 153
 154 /* Function vect_determine_vectorization_factor
 155
 156    Determine the vectorization factor (VF).  VF is the number of data elements
 157    that are operated upon in parallel in a single iteration of the vectorized
 158    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 159    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 160    elements can fit in a single vector register.
 161
 162    We currently support vectorization of loops in which all types operated upon
 163    are of the same size.  Therefore this function currently sets VF according to
 164    the size of the types operated upon, and fails if there are multiple sizes
 165    in the loop.
 166
 167    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 168    original loop:
 169         for (i=0; i<N; i++){
 170           a[i] = b[i] + c[i];
 171         }
 172
 173    vectorized loop:
 174         for (i=0; i<N; i+=VF){
 175           a[i:VF] = b[i:VF] + c[i:VF];
 176         }
 177 */
 178
 179 static bool
 180 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 181 {
 182   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 183   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 184   unsigned nbbs = loop->num_nodes;
 185   unsigned int vectorization_factor = 0;
 186   tree scalar_type = NULL_TREE;
 187   gphi *phi;
 188   tree vectype;
 189   unsigned int nunits;
 190   stmt_vec_info stmt_info;
 191   unsigned i;
 192   HOST_WIDE_INT dummy;
 193   gimple *stmt, *pattern_stmt = NULL;
 194   gimple_seq pattern_def_seq = NULL;
 195   gimple_stmt_iterator pattern_def_si = gsi_none ();
 196   bool analyze_pattern_stmt = false;
 197   bool bool_result;
 198   auto_vec<stmt_vec_info> mask_producers;
 199
 200   if (dump_enabled_p ())
 201     dump_printf_loc (MSG_NOTE, vect_location,
 202                      "=== vect_determine_vectorization_factor ===\n");
 203
 204   for (i = 0; i < nbbs; i++)
 205     {
 206       basic_block bb = bbs[i];
 207
 208       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 209            gsi_next (&si))
 210         {
 211           phi = si.phi ();
 212           stmt_info = vinfo_for_stmt (phi);
 213           if (dump_enabled_p ())
 214             {
 215               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 216               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 217             }
 218
 219           gcc_assert (stmt_info);
 220
 221           if (STMT_VINFO_RELEVANT_P (stmt_info)
 222               || STMT_VINFO_LIVE_P (stmt_info))
 223             {
 224               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 225               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 226
 227               if (dump_enabled_p ())
 228                 {
 229                   dump_printf_loc (MSG_NOTE, vect_location,
 230                                    "get vectype for scalar type:  ");
 231                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 232                   dump_printf (MSG_NOTE, "\n");
 233                 }
 234
 235               vectype = get_vectype_for_scalar_type (scalar_type);
 236               if (!vectype)
 237                 {
 238                   if (dump_enabled_p ())
 239                     {
 240                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 241                                        "not vectorized: unsupported "
 242                                        "data-type ");
 243                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 244                                          scalar_type);
 245                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 246                     }
 247                   return false;
 248                 }
 249               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 250
 251               if (dump_enabled_p ())
 252                 {
 253                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 254                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 255                   dump_printf (MSG_NOTE, "\n");
 256                 }
 257
 258               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 259               if (dump_enabled_p ())
 260                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 261                                  nunits);
 262
 263               if (!vectorization_factor
 264                   || (nunits > vectorization_factor))
 265                 vectorization_factor = nunits;
 266             }
 267         }
 268
 269       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 270            !gsi_end_p (si) || analyze_pattern_stmt;)
 271         {
 272           tree vf_vectype;
 273
 274           if (analyze_pattern_stmt)
 275             stmt = pattern_stmt;
 276           else
 277             stmt = gsi_stmt (si);
 278
 279           stmt_info = vinfo_for_stmt (stmt);
 280
 281           if (dump_enabled_p ())
 282             {
 283               dump_printf_loc (MSG_NOTE, vect_location,
 284                                "==> examining statement: ");
 285               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 286             }
 287
 288           gcc_assert (stmt_info);
 289
 290           /* Skip stmts which do not need to be vectorized.  */
 291           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 292                && !STMT_VINFO_LIVE_P (stmt_info))
 293               || gimple_clobber_p (stmt))
 294             {
 295               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 296                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 297                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 298                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 299                 {
 300                   stmt = pattern_stmt;
 301                   stmt_info = vinfo_for_stmt (pattern_stmt);
 302                   if (dump_enabled_p ())
 303                     {
 304                       dump_printf_loc (MSG_NOTE, vect_location,
 305                                        "==> examining pattern statement: ");
 306                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 307                     }
 308                 }
 309               else
 310                 {
 311                   if (dump_enabled_p ())
 312                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 313                   gsi_next (&si);
 314                   continue;
 315                 }
 316             }
 317           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 318                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 319                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 320                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 321             analyze_pattern_stmt = true;
 322
 323           /* If a pattern statement has def stmts, analyze them too.  */
 324           if (is_pattern_stmt_p (stmt_info))
 325             {
 326               if (pattern_def_seq == NULL)
 327                 {
 328                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 329                   pattern_def_si = gsi_start (pattern_def_seq);
 330                 }
 331               else if (!gsi_end_p (pattern_def_si))
 332                 gsi_next (&pattern_def_si);
 333               if (pattern_def_seq != NULL)
 334                 {
 335                   gimple *pattern_def_stmt = NULL;
 336                   stmt_vec_info pattern_def_stmt_info = NULL;
 337
 338                   while (!gsi_end_p (pattern_def_si))
 339                     {
 340                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 341                       pattern_def_stmt_info
 342                         = vinfo_for_stmt (pattern_def_stmt);
 343                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 344                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 345                         break;
 346                       gsi_next (&pattern_def_si);
 347                     }
 348
 349                   if (!gsi_end_p (pattern_def_si))
 350                     {
 351                       if (dump_enabled_p ())
 352                         {
 353                           dump_printf_loc (MSG_NOTE, vect_location,
 354                                            "==> examining pattern def stmt: ");
 355                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 356                                             pattern_def_stmt, 0);
 357                         }
 358
 359                       stmt = pattern_def_stmt;
 360                       stmt_info = pattern_def_stmt_info;
 361                     }
 362                   else
 363                     {
 364                       pattern_def_si = gsi_none ();
 365                       analyze_pattern_stmt = false;
 366                     }
 367                 }
 368               else
 369                 analyze_pattern_stmt = false;
 370             }
 371
 372           if (gimple_get_lhs (stmt) == NULL_TREE
 373               /* MASK_STORE has no lhs, but is ok.  */
 374               && (!is_gimple_call (stmt)
 375                   || !gimple_call_internal_p (stmt)
 376                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 377             {
 378               if (is_gimple_call (stmt))
 379                 {
 380                   /* Ignore calls with no lhs.  These must be calls to
 381                      #pragma omp simd functions, and what vectorization factor
 382                      it really needs can't be determined until
 383                      vectorizable_simd_clone_call.  */
 384                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 385                     {
 386                       pattern_def_seq = NULL;
 387                       gsi_next (&si);
 388                     }
 389                   continue;
 390                 }
 391               if (dump_enabled_p ())
 392                 {
 393                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 394                                    "not vectorized: irregular stmt.");
 395                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 396                                     0);
 397                 }
 398               return false;
 399             }
 400
 401           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 402             {
 403               if (dump_enabled_p ())
 404                 {
 405                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 406                                    "not vectorized: vector stmt in loop:");
 407                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 408                 }
 409               return false;
 410             }
 411
 412           bool_result = false;
 413
 414           if (STMT_VINFO_VECTYPE (stmt_info))
 415             {
 416               /* The only case when a vectype had been already set is for stmts
 417                  that contain a dataref, or for "pattern-stmts" (stmts
 418                  generated by the vectorizer to represent/replace a certain
 419                  idiom).  */
 420               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 421                           || is_pattern_stmt_p (stmt_info)
 422                           || !gsi_end_p (pattern_def_si));
 423               vectype = STMT_VINFO_VECTYPE (stmt_info);
 424             }
 425           else
 426             {
 427               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 428               if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
 429                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 430               else
 431                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 432
 433               /* Bool ops don't participate in vectorization factor
 434                  computation.  For comparison use compared types to
 435                  compute a factor.  */
 436               if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
 437                   && is_gimple_assign (stmt)
 438                   && gimple_assign_rhs_code (stmt) != COND_EXPR)
 439                 {
 440                   if (STMT_VINFO_RELEVANT_P (stmt_info)
 441                       || STMT_VINFO_LIVE_P (stmt_info))
 442                     mask_producers.safe_push (stmt_info);
 443                   bool_result = true;
 444
 445                   if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
 446                       == tcc_comparison
 447                       && !VECT_SCALAR_BOOLEAN_TYPE_P
 448                             (TREE_TYPE (gimple_assign_rhs1 (stmt))))
 449                     scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 450                   else
 451                     {
 452                       if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 453                         {
 454                           pattern_def_seq = NULL;
 455                           gsi_next (&si);
 456                         }
 457                       continue;
 458                     }
 459                 }
 460
 461               if (dump_enabled_p ())
 462                 {
 463                   dump_printf_loc (MSG_NOTE, vect_location,
 464                                    "get vectype for scalar type:  ");
 465                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 466                   dump_printf (MSG_NOTE, "\n");
 467                 }
 468               vectype = get_vectype_for_scalar_type (scalar_type);
 469               if (!vectype)
 470                 {
 471                   if (dump_enabled_p ())
 472                     {
 473                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 474                                        "not vectorized: unsupported "
 475                                        "data-type ");
 476                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 477                                          scalar_type);
 478                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 479                     }
 480                   return false;
 481                 }
 482
 483               if (!bool_result)
 484                 STMT_VINFO_VECTYPE (stmt_info) = vectype;
 485
 486               if (dump_enabled_p ())
 487                 {
 488                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 489                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 490                   dump_printf (MSG_NOTE, "\n");
 491                 }
 492             }
 493
 494           /* Don't try to compute VF out scalar types if we stmt
 495              produces boolean vector.  Use result vectype instead.  */
 496           if (VECTOR_BOOLEAN_TYPE_P (vectype))
 497             vf_vectype = vectype;
 498           else
 499             {
 500               /* The vectorization factor is according to the smallest
 501                  scalar type (or the largest vector size, but we only
 502                  support one vector size per loop).  */
 503               if (!bool_result)
 504                 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 505                                                              &dummy);
 506               if (dump_enabled_p ())
 507                 {
 508                   dump_printf_loc (MSG_NOTE, vect_location,
 509                                    "get vectype for scalar type:  ");
 510                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 511                   dump_printf (MSG_NOTE, "\n");
 512                 }
 513               vf_vectype = get_vectype_for_scalar_type (scalar_type);
 514             }
 515           if (!vf_vectype)
 516             {
 517               if (dump_enabled_p ())
 518                 {
 519                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 520                                    "not vectorized: unsupported data-type ");
 521                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 522                                      scalar_type);
 523                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 524                 }
 525               return false;
 526             }
 527
 528           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 529                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 530             {
 531               if (dump_enabled_p ())
 532                 {
 533                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 534                                    "not vectorized: different sized vector "
 535                                    "types in statement, ");
 536                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 537                                      vectype);
 538                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 539                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 540                                      vf_vectype);
 541                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 542                 }
 543               return false;
 544             }
 545
 546           if (dump_enabled_p ())
 547             {
 548               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 549               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 550               dump_printf (MSG_NOTE, "\n");
 551             }
 552
 553           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 554           if (dump_enabled_p ())
 555             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 556           if (!vectorization_factor
 557               || (nunits > vectorization_factor))
 558             vectorization_factor = nunits;
 559
 560           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 561             {
 562               pattern_def_seq = NULL;
 563               gsi_next (&si);
 564             }
 565         }
 566     }
 567
 568   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 569   if (dump_enabled_p ())
 570     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 571                      vectorization_factor);
 572   if (vectorization_factor <= 1)
 573     {
 574       if (dump_enabled_p ())
 575         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 576                          "not vectorized: unsupported data-type\n");
 577       return false;
 578     }
 579   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 580
 581   for (i = 0; i < mask_producers.length (); i++)
 582     {
 583       tree mask_type = NULL;
 584
 585       stmt = STMT_VINFO_STMT (mask_producers[i]);
 586
 587       if (is_gimple_assign (stmt)
 588           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
 589           && !VECT_SCALAR_BOOLEAN_TYPE_P
 590                                       (TREE_TYPE (gimple_assign_rhs1 (stmt))))
 591         {
 592           scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 593           mask_type = get_mask_type_for_scalar_type (scalar_type);
 594
 595           if (!mask_type)
 596             {
 597               if (dump_enabled_p ())
 598                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 599                                  "not vectorized: unsupported mask\n");
 600               return false;
 601             }
 602         }
 603       else
 604         {
 605           tree rhs;
 606           ssa_op_iter iter;
 607           gimple *def_stmt;
 608           enum vect_def_type dt;
 609
 610           FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
 611             {
 612               if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
 613                                        &def_stmt, &dt, &vectype))
 614                 {
 615                   if (dump_enabled_p ())
 616                     {
 617                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 618                                        "not vectorized: can't compute mask type "
 619                                        "for statement, ");
 620                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 621                                         0);
 622                     }
 623                   return false;
 624                 }
 625
 626               /* No vectype probably means external definition.
 627                  Allow it in case there is another operand which
 628                  allows to determine mask type.  */
 629               if (!vectype)
 630                 continue;
 631
 632               if (!mask_type)
 633                 mask_type = vectype;
 634               else if (TYPE_VECTOR_SUBPARTS (mask_type)
 635                        != TYPE_VECTOR_SUBPARTS (vectype))
 636                 {
 637                   if (dump_enabled_p ())
 638                     {
 639                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 640                                        "not vectorized: different sized masks "
 641                                        "types in statement, ");
 642                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 643                                          mask_type);
 644                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 645                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 646                                          vectype);
 647                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 648                     }
 649                   return false;
 650                 }
 651               else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
 652                        != VECTOR_BOOLEAN_TYPE_P (vectype))
 653                 {
 654                   if (dump_enabled_p ())
 655                     {
 656                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 657                                        "not vectorized: mixed mask and "
 658                                        "nonmask vector types in statement, ");
 659                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 660                                          mask_type);
 661                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 662                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 663                                          vectype);
 664                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 665                     }
 666                   return false;
 667                 }
 668             }
 669
 670           /* We may compare boolean value loaded as vector of integers.
 671              Fix mask_type in such case.  */
 672           if (mask_type
 673               && !VECTOR_BOOLEAN_TYPE_P (mask_type)
 674               && gimple_code (stmt) == GIMPLE_ASSIGN
 675               && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
 676             mask_type = build_same_sized_truth_vector_type (mask_type);
 677         }
 678
 679       /* No mask_type should mean loop invariant predicate.
 680          This is probably a subject for optimization in
 681          if-conversion.  */
 682       if (!mask_type)
 683         {
 684           if (dump_enabled_p ())
 685             {
 686               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 687                                "not vectorized: can't compute mask type "
 688                                "for statement, ");
 689               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 690                                 0);
 691             }
 692           return false;
 693         }
 694
 695       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
 696     }
 697
 698   return true;
 699 }
 700
 701
 702 /* Function vect_is_simple_iv_evolution.
 703
 704    FORNOW: A simple evolution of an induction variables in the loop is
 705    considered a polynomial evolution.  */
 706
 707 static bool
 708 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 709                              tree * step)
 710 {
 711   tree init_expr;
 712   tree step_expr;
 713   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 714   basic_block bb;
 715
 716   /* When there is no evolution in this loop, the evolution function
 717      is not "simple".  */
 718   if (evolution_part == NULL_TREE)
 719     return false;
 720
 721   /* When the evolution is a polynomial of degree >= 2
 722      the evolution function is not "simple".  */
 723   if (tree_is_chrec (evolution_part))
 724     return false;
 725
 726   step_expr = evolution_part;
 727   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 728
 729   if (dump_enabled_p ())
 730     {
 731       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 732       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 733       dump_printf (MSG_NOTE, ",  init: ");
 734       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 735       dump_printf (MSG_NOTE, "\n");
 736     }
 737
 738   *init = init_expr;
 739   *step = step_expr;
 740
 741   if (TREE_CODE (step_expr) != INTEGER_CST
 742       && (TREE_CODE (step_expr) != SSA_NAME
 743           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 744               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 745           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 746               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 747                   || !flag_associative_math)))
 748       && (TREE_CODE (step_expr) != REAL_CST
 749           || !flag_associative_math))
 750     {
 751       if (dump_enabled_p ())
 752         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 753                          "step unknown.\n");
 754       return false;
 755     }
 756
 757   return true;
 758 }
 759
 760 /* Function vect_analyze_scalar_cycles_1.
 761
 762    Examine the cross iteration def-use cycles of scalar variables
 763    in LOOP.  LOOP_VINFO represents the loop that is now being
 764    considered for vectorization (can be LOOP, or an outer-loop
 765    enclosing LOOP).  */
 766
 767 static void
 768 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 769 {
 770   basic_block bb = loop->header;
 771   tree init, step;
 772   auto_vec<gimple *, 64> worklist;
 773   gphi_iterator gsi;
 774   bool double_reduc;
 775
 776   if (dump_enabled_p ())
 777     dump_printf_loc (MSG_NOTE, vect_location,
 778                      "=== vect_analyze_scalar_cycles ===\n");
 779
 780   /* First - identify all inductions.  Reduction detection assumes that all the
 781      inductions have been identified, therefore, this order must not be
 782      changed.  */
 783   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 784     {
 785       gphi *phi = gsi.phi ();
 786       tree access_fn = NULL;
 787       tree def = PHI_RESULT (phi);
 788       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 789
 790       if (dump_enabled_p ())
 791         {
 792           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 793           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 794         }
 795
 796       /* Skip virtual phi's.  The data dependences that are associated with
 797          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 798       if (virtual_operand_p (def))
 799         continue;
 800
 801       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 802
 803       /* Analyze the evolution function.  */
 804       access_fn = analyze_scalar_evolution (loop, def);
 805       if (access_fn)
 806         {
 807           STRIP_NOPS (access_fn);
 808           if (dump_enabled_p ())
 809             {
 810               dump_printf_loc (MSG_NOTE, vect_location,
 811                                "Access function of PHI: ");
 812               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 813               dump_printf (MSG_NOTE, "\n");
 814             }
 815           STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 816             = initial_condition_in_loop_num (access_fn, loop->num);
 817           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 818             = evolution_part_in_loop_num (access_fn, loop->num);
 819         }
 820
 821       if (!access_fn
 822           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 823           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 824               && TREE_CODE (step) != INTEGER_CST))
 825         {
 826           worklist.safe_push (phi);
 827           continue;
 828         }
 829
 830       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 831                   != NULL_TREE);
 832       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 833
 834       if (dump_enabled_p ())
 835         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 836       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 837     }
 838
 839
 840   /* Second - identify all reductions and nested cycles.  */
 841   while (worklist.length () > 0)
 842     {
 843       gimple *phi = worklist.pop ();
 844       tree def = PHI_RESULT (phi);
 845       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 846       gimple *reduc_stmt;
 847
 848       if (dump_enabled_p ())
 849         {
 850           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 851           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 852         }
 853
 854       gcc_assert (!virtual_operand_p (def)
 855                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 856
 857       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi,
 858                                                 &double_reduc, false);
 859       if (reduc_stmt)
 860         {
 861           if (double_reduc)
 862             {
 863               if (dump_enabled_p ())
 864                 dump_printf_loc (MSG_NOTE, vect_location,
 865                                  "Detected double reduction.\n");
 866
 867               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 868               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 869                                                     vect_double_reduction_def;
 870             }
 871           else
 872             {
 873               if (loop != LOOP_VINFO_LOOP (loop_vinfo))
 874                 {
 875                   if (dump_enabled_p ())
 876                     dump_printf_loc (MSG_NOTE, vect_location,
 877                                      "Detected vectorizable nested cycle.\n");
 878
 879                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 880                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 881                                                              vect_nested_cycle;
 882                 }
 883               else
 884                 {
 885                   if (dump_enabled_p ())
 886                     dump_printf_loc (MSG_NOTE, vect_location,
 887                                      "Detected reduction.\n");
 888
 889                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 890                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 891                                                            vect_reduction_def;
 892                   /* Store the reduction cycles for possible vectorization in
 893                      loop-aware SLP if it was not detected as reduction
 894                      chain.  */
 895                   if (! GROUP_FIRST_ELEMENT (vinfo_for_stmt (reduc_stmt)))
 896                     LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 897                 }
 898             }
 899         }
 900       else
 901         if (dump_enabled_p ())
 902           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 903                            "Unknown def-use cycle pattern.\n");
 904     }
 905 }
 906
 907
 908 /* Function vect_analyze_scalar_cycles.
 909
 910    Examine the cross iteration def-use cycles of scalar variables, by
 911    analyzing the loop-header PHIs of scalar variables.  Classify each
 912    cycle as one of the following: invariant, induction, reduction, unknown.
 913    We do that for the loop represented by LOOP_VINFO, and also to its
 914    inner-loop, if exists.
 915    Examples for scalar cycles:
 916
 917    Example1: reduction:
 918
 919               loop1:
 920               for (i=0; i<N; i++)
 921                  sum += a[i];
 922
 923    Example2: induction:
 924
 925               loop2:
 926               for (i=0; i<N; i++)
 927                  a[i] = i;  */
 928
 929 static void
 930 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 931 {
 932   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 933
 934   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 935
 936   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 937      Reductions in such inner-loop therefore have different properties than
 938      the reductions in the nest that gets vectorized:
 939      1. When vectorized, they are executed in the same order as in the original
 940         scalar loop, so we can't change the order of computation when
 941         vectorizing them.
 942      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 943         current checks are too strict.  */
 944
 945   if (loop->inner)
 946     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 947 }
 948
 949 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 950
 951 static void
 952 vect_fixup_reduc_chain (gimple *stmt)
 953 {
 954   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 955   gimple *stmtp;
 956   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 957               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 958   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 959   do
 960     {
 961       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 962       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 963       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 964       if (stmt)
 965         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 966           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 967     }
 968   while (stmt);
 969   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 970 }
 971
 972 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 973
 974 static void
 975 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 976 {
 977   gimple *first;
 978   unsigned i;
 979
 980   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 981     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 982       {
 983         gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
 984         while (next)
 985           {
 986             if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)))
 987               break;
 988             next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
 989           }
 990         /* If not all stmt in the chain are patterns try to handle
 991            the chain without patterns.  */
 992         if (! next)
 993           {
 994             vect_fixup_reduc_chain (first);
 995             LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 996               = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 997           }
 998       }
 999 }
1000
1001 /* Function vect_get_loop_niters.
1002
1003    Determine how many iterations the loop is executed and place it
1004    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
1005    in NUMBER_OF_ITERATIONSM1.  Place the condition under which the
1006    niter information holds in ASSUMPTIONS.
1007
1008    Return the loop exit condition.  */
1009
1010
1011 static gcond *
1012 vect_get_loop_niters (struct loop *loop, tree *assumptions,
1013                       tree *number_of_iterations, tree *number_of_iterationsm1)
1014 {
1015   edge exit = single_exit (loop);
1016   struct tree_niter_desc niter_desc;
1017   tree niter_assumptions, niter, may_be_zero;
1018   gcond *cond = get_loop_exit_condition (loop);
1019
1020   *assumptions = boolean_true_node;
1021   *number_of_iterationsm1 = chrec_dont_know;
1022   *number_of_iterations = chrec_dont_know;
1023   if (dump_enabled_p ())
1024     dump_printf_loc (MSG_NOTE, vect_location,
1025                      "=== get_loop_niters ===\n");
1026
1027   if (!exit)
1028     return cond;
1029
1030   niter = chrec_dont_know;
1031   may_be_zero = NULL_TREE;
1032   niter_assumptions = boolean_true_node;
1033   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
1034       || chrec_contains_undetermined (niter_desc.niter))
1035     return cond;
1036
1037   niter_assumptions = niter_desc.assumptions;
1038   may_be_zero = niter_desc.may_be_zero;
1039   niter = niter_desc.niter;
1040
1041   if (may_be_zero && integer_zerop (may_be_zero))
1042     may_be_zero = NULL_TREE;
1043
1044   if (may_be_zero)
1045     {
1046       if (COMPARISON_CLASS_P (may_be_zero))
1047         {
1048           /* Try to combine may_be_zero with assumptions, this can simplify
1049              computation of niter expression.  */
1050           if (niter_assumptions && !integer_nonzerop (niter_assumptions))
1051             niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1052                                              niter_assumptions,
1053                                              fold_build1 (TRUTH_NOT_EXPR,
1054                                                           boolean_type_node,
1055                                                           may_be_zero));
1056           else
1057             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
1058                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1059
1060           may_be_zero = NULL_TREE;
1061         }
1062       else if (integer_nonzerop (may_be_zero))
1063         {
1064           *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
1065           *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
1066           return cond;
1067         }
1068       else
1069         return cond;
1070     }
1071
1072   *assumptions = niter_assumptions;
1073   *number_of_iterationsm1 = niter;
1074
1075   /* We want the number of loop header executions which is the number
1076      of latch executions plus one.
1077      ???  For UINT_MAX latch executions this number overflows to zero
1078      for loops like do { n++; } while (n != 0);  */
1079   if (niter && !chrec_contains_undetermined (niter))
1080     niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),
1081                           build_int_cst (TREE_TYPE (niter), 1));
1082   *number_of_iterations = niter;
1083
1084   return cond;
1085 }
1086
1087 /* Function bb_in_loop_p
1088
1089    Used as predicate for dfs order traversal of the loop bbs.  */
1090
1091 static bool
1092 bb_in_loop_p (const_basic_block bb, const void *data)
1093 {
1094   const struct loop *const loop = (const struct loop *)data;
1095   if (flow_bb_inside_loop_p (loop, bb))
1096     return true;
1097   return false;
1098 }
1099
1100
1101 /* Function new_loop_vec_info.
1102
1103    Create and initialize a new loop_vec_info struct for LOOP, as well as
1104    stmt_vec_info structs for all the stmts in LOOP.  */
1105
1106 static loop_vec_info
1107 new_loop_vec_info (struct loop *loop)
1108 {
1109   loop_vec_info res;
1110   basic_block *bbs;
1111   gimple_stmt_iterator si;
1112   unsigned int i, nbbs;
1113
1114   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1115   res->kind = vec_info::loop;
1116   LOOP_VINFO_LOOP (res) = loop;
1117
1118   bbs = get_loop_body (loop);
1119
1120   /* Create/Update stmt_info for all stmts in the loop.  */
1121   for (i = 0; i < loop->num_nodes; i++)
1122     {
1123       basic_block bb = bbs[i];
1124
1125       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1126         {
1127           gimple *phi = gsi_stmt (si);
1128           gimple_set_uid (phi, 0);
1129           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1130         }
1131
1132       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1133         {
1134           gimple *stmt = gsi_stmt (si);
1135           gimple_set_uid (stmt, 0);
1136           set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1137         }
1138     }
1139
1140   /* CHECKME: We want to visit all BBs before their successors (except for
1141      latch blocks, for which this assertion wouldn't hold).  In the simple
1142      case of the loop forms we allow, a dfs order of the BBs would the same
1143      as reversed postorder traversal, so we are safe.  */
1144
1145    free (bbs);
1146    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1147    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1148                               bbs, loop->num_nodes, loop);
1149    gcc_assert (nbbs == loop->num_nodes);
1150
1151   LOOP_VINFO_BBS (res) = bbs;
1152   LOOP_VINFO_NITERSM1 (res) = NULL;
1153   LOOP_VINFO_NITERS (res) = NULL;
1154   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1155   LOOP_VINFO_NITERS_ASSUMPTIONS (res) = NULL;
1156   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1157   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1158   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1159   LOOP_VINFO_VECT_FACTOR (res) = 0;
1160   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1161   LOOP_VINFO_DATAREFS (res) = vNULL;
1162   LOOP_VINFO_DDRS (res) = vNULL;
1163   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1164   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1165   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1166   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1167   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1168   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1169   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1170   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1171   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1172   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1173   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1174   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1175   LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
1176
1177   return res;
1178 }
1179
1180
1181 /* Function destroy_loop_vec_info.
1182
1183    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1184    stmts in the loop.  */
1185
1186 void
1187 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1188 {
1189   struct loop *loop;
1190   basic_block *bbs;
1191   int nbbs;
1192   gimple_stmt_iterator si;
1193   int j;
1194   vec<slp_instance> slp_instances;
1195   slp_instance instance;
1196   bool swapped;
1197
1198   if (!loop_vinfo)
1199     return;
1200
1201   loop = LOOP_VINFO_LOOP (loop_vinfo);
1202
1203   bbs = LOOP_VINFO_BBS (loop_vinfo);
1204   nbbs = clean_stmts ? loop->num_nodes : 0;
1205   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1206
1207   for (j = 0; j < nbbs; j++)
1208     {
1209       basic_block bb = bbs[j];
1210       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1211         free_stmt_vec_info (gsi_stmt (si));
1212
1213       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1214         {
1215           gimple *stmt = gsi_stmt (si);
1216
1217           /* We may have broken canonical form by moving a constant
1218              into RHS1 of a commutative op.  Fix such occurrences.  */
1219           if (swapped && is_gimple_assign (stmt))
1220             {
1221               enum tree_code code = gimple_assign_rhs_code (stmt);
1222
1223               if ((code == PLUS_EXPR
1224                    || code == POINTER_PLUS_EXPR
1225                    || code == MULT_EXPR)
1226                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1227                 swap_ssa_operands (stmt,
1228                                    gimple_assign_rhs1_ptr (stmt),
1229                                    gimple_assign_rhs2_ptr (stmt));
1230               else if (code == COND_EXPR
1231                        && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
1232                 {
1233                   tree cond_expr = gimple_assign_rhs1 (stmt);
1234                   enum tree_code cond_code = TREE_CODE (cond_expr);
1235
1236                   if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1237                     {
1238                       bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr,
1239                                                                   0));
1240                       cond_code = invert_tree_comparison (cond_code,
1241                                                           honor_nans);
1242                       if (cond_code != ERROR_MARK)
1243                         {
1244                           TREE_SET_CODE (cond_expr, cond_code);
1245                           swap_ssa_operands (stmt,
1246                                              gimple_assign_rhs2_ptr (stmt),
1247                                              gimple_assign_rhs3_ptr (stmt));
1248                         }
1249                     }
1250                 }
1251             }
1252
1253           /* Free stmt_vec_info.  */
1254           free_stmt_vec_info (stmt);
1255           gsi_next (&si);
1256         }
1257     }
1258
1259   free (LOOP_VINFO_BBS (loop_vinfo));
1260   vect_destroy_datarefs (loop_vinfo);
1261   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1262   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1263   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1264   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
1265   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1266   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1267   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1268     vect_free_slp_instance (instance);
1269
1270   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1271   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1272   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1273   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1274
1275   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1276   loop_vinfo->scalar_cost_vec.release ();
1277
1278   free (loop_vinfo);
1279   loop->aux = NULL;
1280 }
1281
1282
1283 /* Calculate the cost of one scalar iteration of the loop.  */
1284 static void
1285 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1286 {
1287   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1288   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1289   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1290   int innerloop_iters, i;
1291
1292   /* Count statements in scalar loop.  Using this as scalar cost for a single
1293      iteration for now.
1294
1295      TODO: Add outer loop support.
1296
1297      TODO: Consider assigning different costs to different scalar
1298      statements.  */
1299
1300   /* FORNOW.  */
1301   innerloop_iters = 1;
1302   if (loop->inner)
1303     innerloop_iters = 50; /* FIXME */
1304
1305   for (i = 0; i < nbbs; i++)
1306     {
1307       gimple_stmt_iterator si;
1308       basic_block bb = bbs[i];
1309
1310       if (bb->loop_father == loop->inner)
1311         factor = innerloop_iters;
1312       else
1313         factor = 1;
1314
1315       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1316         {
1317           gimple *stmt = gsi_stmt (si);
1318           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1319
1320           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1321             continue;
1322
1323           /* Skip stmts that are not vectorized inside the loop.  */
1324           if (stmt_info
1325               && !STMT_VINFO_RELEVANT_P (stmt_info)
1326               && (!STMT_VINFO_LIVE_P (stmt_info)
1327                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1328               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1329             continue;
1330
1331           vect_cost_for_stmt kind;
1332           if (STMT_VINFO_DATA_REF (stmt_info))
1333             {
1334               if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1335                kind = scalar_load;
1336              else
1337                kind = scalar_store;
1338             }
1339           else
1340             kind = scalar_stmt;
1341
1342           scalar_single_iter_cost
1343             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1344                                  factor, kind, stmt_info, 0, vect_prologue);
1345         }
1346     }
1347   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1348     = scalar_single_iter_cost;
1349 }
1350
1351
1352 /* Function vect_analyze_loop_form_1.
1353
1354    Verify that certain CFG restrictions hold, including:
1355    - the loop has a pre-header
1356    - the loop has a single entry and exit
1357    - the loop exit condition is simple enough
1358    - the number of iterations can be analyzed, i.e, a countable loop.  The
1359      niter could be analyzed under some assumptions.  */
1360
1361 bool
1362 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1363                           tree *assumptions, tree *number_of_iterationsm1,
1364                           tree *number_of_iterations, gcond **inner_loop_cond)
1365 {
1366   if (dump_enabled_p ())
1367     dump_printf_loc (MSG_NOTE, vect_location,
1368                      "=== vect_analyze_loop_form ===\n");
1369
1370   /* Different restrictions apply when we are considering an inner-most loop,
1371      vs. an outer (nested) loop.
1372      (FORNOW. May want to relax some of these restrictions in the future).  */
1373
1374   if (!loop->inner)
1375     {
1376       /* Inner-most loop.  We currently require that the number of BBs is
1377          exactly 2 (the header and latch).  Vectorizable inner-most loops
1378          look like this:
1379
1380                         (pre-header)
1381                            |
1382                           header <--------+
1383                            | |            |
1384                            | +--> latch --+
1385                            |
1386                         (exit-bb)  */
1387
1388       if (loop->num_nodes != 2)
1389         {
1390           if (dump_enabled_p ())
1391             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392                              "not vectorized: control flow in loop.\n");
1393           return false;
1394         }
1395
1396       if (empty_block_p (loop->header))
1397         {
1398           if (dump_enabled_p ())
1399             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1400                              "not vectorized: empty loop.\n");
1401           return false;
1402         }
1403     }
1404   else
1405     {
1406       struct loop *innerloop = loop->inner;
1407       edge entryedge;
1408
1409       /* Nested loop. We currently require that the loop is doubly-nested,
1410          contains a single inner loop, and the number of BBs is exactly 5.
1411          Vectorizable outer-loops look like this:
1412
1413                         (pre-header)
1414                            |
1415                           header <---+
1416                            |         |
1417                           inner-loop |
1418                            |         |
1419                           tail ------+
1420                            |
1421                         (exit-bb)
1422
1423          The inner-loop has the properties expected of inner-most loops
1424          as described above.  */
1425
1426       if ((loop->inner)->inner || (loop->inner)->next)
1427         {
1428           if (dump_enabled_p ())
1429             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1430                              "not vectorized: multiple nested loops.\n");
1431           return false;
1432         }
1433
1434       if (loop->num_nodes != 5)
1435         {
1436           if (dump_enabled_p ())
1437             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1438                              "not vectorized: control flow in loop.\n");
1439           return false;
1440         }
1441
1442       entryedge = loop_preheader_edge (innerloop);
1443       if (entryedge->src != loop->header
1444           || !single_exit (innerloop)
1445           || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1446         {
1447           if (dump_enabled_p ())
1448             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1449                              "not vectorized: unsupported outerloop form.\n");
1450           return false;
1451         }
1452
1453       /* Analyze the inner-loop.  */
1454       tree inner_niterm1, inner_niter, inner_assumptions;
1455       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1456                                       &inner_assumptions, &inner_niterm1,
1457                                       &inner_niter, NULL)
1458           /* Don't support analyzing niter under assumptions for inner
1459              loop.  */
1460           || !integer_onep (inner_assumptions))
1461         {
1462           if (dump_enabled_p ())
1463             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1464                              "not vectorized: Bad inner loop.\n");
1465           return false;
1466         }
1467
1468       if (!expr_invariant_in_loop_p (loop, inner_niter))
1469         {
1470           if (dump_enabled_p ())
1471             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1472                              "not vectorized: inner-loop count not"
1473                              " invariant.\n");
1474           return false;
1475         }
1476
1477       if (dump_enabled_p ())
1478         dump_printf_loc (MSG_NOTE, vect_location,
1479                          "Considering outer-loop vectorization.\n");
1480     }
1481
1482   if (!single_exit (loop)
1483       || EDGE_COUNT (loop->header->preds) != 2)
1484     {
1485       if (dump_enabled_p ())
1486         {
1487           if (!single_exit (loop))
1488             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1489                              "not vectorized: multiple exits.\n");
1490           else if (EDGE_COUNT (loop->header->preds) != 2)
1491             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1492                              "not vectorized: too many incoming edges.\n");
1493         }
1494       return false;
1495     }
1496
1497   /* We assume that the loop exit condition is at the end of the loop. i.e,
1498      that the loop is represented as a do-while (with a proper if-guard
1499      before the loop if needed), where the loop header contains all the
1500      executable statements, and the latch is empty.  */
1501   if (!empty_block_p (loop->latch)
1502       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1503     {
1504       if (dump_enabled_p ())
1505         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1506                          "not vectorized: latch block not empty.\n");
1507       return false;
1508     }
1509
1510   /* Make sure the exit is not abnormal.  */
1511   edge e = single_exit (loop);
1512   if (e->flags & EDGE_ABNORMAL)
1513     {
1514       if (dump_enabled_p ())
1515         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1516                          "not vectorized: abnormal loop exit edge.\n");
1517       return false;
1518     }
1519
1520   *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
1521                                      number_of_iterationsm1);
1522   if (!*loop_cond)
1523     {
1524       if (dump_enabled_p ())
1525         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1526                          "not vectorized: complicated exit condition.\n");
1527       return false;
1528     }
1529
1530   if (integer_zerop (*assumptions)
1531       || !*number_of_iterations
1532       || chrec_contains_undetermined (*number_of_iterations))
1533     {
1534       if (dump_enabled_p ())
1535         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1536                          "not vectorized: number of iterations cannot be "
1537                          "computed.\n");
1538       return false;
1539     }
1540
1541   if (integer_zerop (*number_of_iterations))
1542     {
1543       if (dump_enabled_p ())
1544         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1545                          "not vectorized: number of iterations = 0.\n");
1546       return false;
1547     }
1548
1549   return true;
1550 }
1551
1552 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1553
1554 loop_vec_info
1555 vect_analyze_loop_form (struct loop *loop)
1556 {
1557   tree assumptions, number_of_iterations, number_of_iterationsm1;
1558   gcond *loop_cond, *inner_loop_cond = NULL;
1559
1560   if (! vect_analyze_loop_form_1 (loop, &loop_cond,
1561                                   &assumptions, &number_of_iterationsm1,
1562                                   &number_of_iterations, &inner_loop_cond))
1563     return NULL;
1564
1565   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1566   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1567   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1568   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1569   if (!integer_onep (assumptions))
1570     {
1571       /* We consider to vectorize this loop by versioning it under
1572          some assumptions.  In order to do this, we need to clear
1573          existing information computed by scev and niter analyzer.  */
1574       scev_reset_htab ();
1575       free_numbers_of_iterations_estimates (loop);
1576       /* Also set flag for this loop so that following scev and niter
1577          analysis are done under the assumptions.  */
1578       loop_constraint_set (loop, LOOP_C_FINITE);
1579       /* Also record the assumptions for versioning.  */
1580       LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = assumptions;
1581     }
1582
1583   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1584     {
1585       if (dump_enabled_p ())
1586         {
1587           dump_printf_loc (MSG_NOTE, vect_location,
1588                            "Symbolic number of iterations is ");
1589           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1590           dump_printf (MSG_NOTE, "\n");
1591         }
1592     }
1593
1594   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1595   if (inner_loop_cond)
1596     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1597       = loop_exit_ctrl_vec_info_type;
1598
1599   gcc_assert (!loop->aux);
1600   loop->aux = loop_vinfo;
1601   return loop_vinfo;
1602 }
1603
1604
1605
1606 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1607    statements update the vectorization factor.  */
1608
1609 static void
1610 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1611 {
1612   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1613   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1614   int nbbs = loop->num_nodes;
1615   unsigned int vectorization_factor;
1616   int i;
1617
1618   if (dump_enabled_p ())
1619     dump_printf_loc (MSG_NOTE, vect_location,
1620                      "=== vect_update_vf_for_slp ===\n");
1621
1622   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1623   gcc_assert (vectorization_factor != 0);
1624
1625   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1626      vectorization factor of the loop is the unrolling factor required by
1627      the SLP instances.  If that unrolling factor is 1, we say, that we
1628      perform pure SLP on loop - cross iteration parallelism is not
1629      exploited.  */
1630   bool only_slp_in_loop = true;
1631   for (i = 0; i < nbbs; i++)
1632     {
1633       basic_block bb = bbs[i];
1634       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1635            gsi_next (&si))
1636         {
1637           gimple *stmt = gsi_stmt (si);
1638           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1639           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1640               && STMT_VINFO_RELATED_STMT (stmt_info))
1641             {
1642               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1643               stmt_info = vinfo_for_stmt (stmt);
1644             }
1645           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1646                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1647               && !PURE_SLP_STMT (stmt_info))
1648             /* STMT needs both SLP and loop-based vectorization.  */
1649             only_slp_in_loop = false;
1650         }
1651     }
1652
1653   if (only_slp_in_loop)
1654     {
1655       dump_printf_loc (MSG_NOTE, vect_location,
1656                        "Loop contains only SLP stmts\n");
1657       vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1658     }
1659   else
1660     {
1661       dump_printf_loc (MSG_NOTE, vect_location,
1662                        "Loop contains SLP and non-SLP stmts\n");
1663       vectorization_factor
1664         = least_common_multiple (vectorization_factor,
1665                                  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1666     }
1667
1668   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1669   if (dump_enabled_p ())
1670     dump_printf_loc (MSG_NOTE, vect_location,
1671                      "Updating vectorization factor to %d\n",
1672                      vectorization_factor);
1673 }
1674
1675 /* Function vect_analyze_loop_operations.
1676
1677    Scan the loop stmts and make sure they are all vectorizable.  */
1678
1679 static bool
1680 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1681 {
1682   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1683   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1684   int nbbs = loop->num_nodes;
1685   int i;
1686   stmt_vec_info stmt_info;
1687   bool need_to_vectorize = false;
1688   bool ok;
1689
1690   if (dump_enabled_p ())
1691     dump_printf_loc (MSG_NOTE, vect_location,
1692                      "=== vect_analyze_loop_operations ===\n");
1693
1694   for (i = 0; i < nbbs; i++)
1695     {
1696       basic_block bb = bbs[i];
1697
1698       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1699            gsi_next (&si))
1700         {
1701           gphi *phi = si.phi ();
1702           ok = true;
1703
1704           stmt_info = vinfo_for_stmt (phi);
1705           if (dump_enabled_p ())
1706             {
1707               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1708               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1709             }
1710           if (virtual_operand_p (gimple_phi_result (phi)))
1711             continue;
1712
1713           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1714              (i.e., a phi in the tail of the outer-loop).  */
1715           if (! is_loop_header_bb_p (bb))
1716             {
1717               /* FORNOW: we currently don't support the case that these phis
1718                  are not used in the outerloop (unless it is double reduction,
1719                  i.e., this phi is vect_reduction_def), cause this case
1720                  requires to actually do something here.  */
1721               if (STMT_VINFO_LIVE_P (stmt_info)
1722                   && STMT_VINFO_DEF_TYPE (stmt_info)
1723                      != vect_double_reduction_def)
1724                 {
1725                   if (dump_enabled_p ())
1726                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1727                                      "Unsupported loop-closed phi in "
1728                                      "outer-loop.\n");
1729                   return false;
1730                 }
1731
1732               /* If PHI is used in the outer loop, we check that its operand
1733                  is defined in the inner loop.  */
1734               if (STMT_VINFO_RELEVANT_P (stmt_info))
1735                 {
1736                   tree phi_op;
1737                   gimple *op_def_stmt;
1738
1739                   if (gimple_phi_num_args (phi) != 1)
1740                     return false;
1741
1742                   phi_op = PHI_ARG_DEF (phi, 0);
1743                   if (TREE_CODE (phi_op) != SSA_NAME)
1744                     return false;
1745
1746                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1747                   if (gimple_nop_p (op_def_stmt)
1748                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1749                       || !vinfo_for_stmt (op_def_stmt))
1750                     return false;
1751
1752                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1753                         != vect_used_in_outer
1754                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1755                            != vect_used_in_outer_by_reduction)
1756                     return false;
1757                 }
1758
1759               continue;
1760             }
1761
1762           gcc_assert (stmt_info);
1763
1764           if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1765                || STMT_VINFO_LIVE_P (stmt_info))
1766               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1767             {
1768               /* A scalar-dependence cycle that we don't support.  */
1769               if (dump_enabled_p ())
1770                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1771                                  "not vectorized: scalar dependence cycle.\n");
1772               return false;
1773             }
1774
1775           if (STMT_VINFO_RELEVANT_P (stmt_info))
1776             {
1777               need_to_vectorize = true;
1778               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
1779                   && ! PURE_SLP_STMT (stmt_info))
1780                 ok = vectorizable_induction (phi, NULL, NULL, NULL);
1781               else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1782                         || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
1783                        && ! PURE_SLP_STMT (stmt_info))
1784                 ok = vectorizable_reduction (phi, NULL, NULL, NULL);
1785             }
1786
1787           if (ok && STMT_VINFO_LIVE_P (stmt_info))
1788             ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
1789
1790           if (!ok)
1791             {
1792               if (dump_enabled_p ())
1793                 {
1794                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1795                                    "not vectorized: relevant phi not "
1796                                    "supported: ");
1797                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1798                 }
1799               return false;
1800             }
1801         }
1802
1803       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1804            gsi_next (&si))
1805         {
1806           gimple *stmt = gsi_stmt (si);
1807           if (!gimple_clobber_p (stmt)
1808               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1809             return false;
1810         }
1811     } /* bbs */
1812
1813   /* All operations in the loop are either irrelevant (deal with loop
1814      control, or dead), or only used outside the loop and can be moved
1815      out of the loop (e.g. invariants, inductions).  The loop can be
1816      optimized away by scalar optimizations.  We're better off not
1817      touching this loop.  */
1818   if (!need_to_vectorize)
1819     {
1820       if (dump_enabled_p ())
1821         dump_printf_loc (MSG_NOTE, vect_location,
1822                          "All the computation can be taken out of the loop.\n");
1823       if (dump_enabled_p ())
1824         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1825                          "not vectorized: redundant loop. no profit to "
1826                          "vectorize.\n");
1827       return false;
1828     }
1829
1830   return true;
1831 }
1832
1833
1834 /* Function vect_analyze_loop_2.
1835
1836    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1837    for it.  The different analyses will record information in the
1838    loop_vec_info struct.  */
1839 static bool
1840 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1841 {
1842   bool ok;
1843   int max_vf = MAX_VECTORIZATION_FACTOR;
1844   int min_vf = 2;
1845   unsigned int n_stmts = 0;
1846
1847   /* The first group of checks is independent of the vector size.  */
1848   fatal = true;
1849
1850   /* Find all data references in the loop (which correspond to vdefs/vuses)
1851      and analyze their evolution in the loop.  */
1852
1853   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1854
1855   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1856   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1857     {
1858       if (dump_enabled_p ())
1859         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860                          "not vectorized: loop nest containing two "
1861                          "or more consecutive inner loops cannot be "
1862                          "vectorized\n");
1863       return false;
1864     }
1865
1866   for (unsigned i = 0; i < loop->num_nodes; i++)
1867     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1868          !gsi_end_p (gsi); gsi_next (&gsi))
1869       {
1870         gimple *stmt = gsi_stmt (gsi);
1871         if (is_gimple_debug (stmt))
1872           continue;
1873         ++n_stmts;
1874         if (!find_data_references_in_stmt (loop, stmt,
1875                                            &LOOP_VINFO_DATAREFS (loop_vinfo)))
1876           {
1877             if (is_gimple_call (stmt) && loop->safelen)
1878               {
1879                 tree fndecl = gimple_call_fndecl (stmt), op;
1880                 if (fndecl != NULL_TREE)
1881                   {
1882                     cgraph_node *node = cgraph_node::get (fndecl);
1883                     if (node != NULL && node->simd_clones != NULL)
1884                       {
1885                         unsigned int j, n = gimple_call_num_args (stmt);
1886                         for (j = 0; j < n; j++)
1887                           {
1888                             op = gimple_call_arg (stmt, j);
1889                             if (DECL_P (op)
1890                                 || (REFERENCE_CLASS_P (op)
1891                                     && get_base_address (op)))
1892                               break;
1893                           }
1894                         op = gimple_call_lhs (stmt);
1895                         /* Ignore #pragma omp declare simd functions
1896                            if they don't have data references in the
1897                            call stmt itself.  */
1898                         if (j == n
1899                             && !(op
1900                                  && (DECL_P (op)
1901                                      || (REFERENCE_CLASS_P (op)
1902                                          && get_base_address (op)))))
1903                           continue;
1904                       }
1905                   }
1906               }
1907             if (dump_enabled_p ())
1908               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1909                                "not vectorized: loop contains function "
1910                                "calls or data references that cannot "
1911                                "be analyzed\n");
1912             return false;
1913           }
1914       }
1915
1916   /* Analyze the data references and also adjust the minimal
1917      vectorization factor according to the loads and stores.  */
1918
1919   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1920   if (!ok)
1921     {
1922       if (dump_enabled_p ())
1923         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1924                          "bad data references.\n");
1925       return false;
1926     }
1927
1928   /* Classify all cross-iteration scalar data-flow cycles.
1929      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1930   vect_analyze_scalar_cycles (loop_vinfo);
1931
1932   vect_pattern_recog (loop_vinfo);
1933
1934   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1935
1936   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1937      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1938
1939   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1940   if (!ok)
1941     {
1942       if (dump_enabled_p ())
1943         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944                          "bad data access.\n");
1945       return false;
1946     }
1947
1948   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1949
1950   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1951   if (!ok)
1952     {
1953       if (dump_enabled_p ())
1954         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1955                          "unexpected pattern.\n");
1956       return false;
1957     }
1958
1959   /* While the rest of the analysis below depends on it in some way.  */
1960   fatal = false;
1961
1962   /* Analyze data dependences between the data-refs in the loop
1963      and adjust the maximum vectorization factor according to
1964      the dependences.
1965      FORNOW: fail at the first data dependence that we encounter.  */
1966
1967   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1968   if (!ok
1969       || max_vf < min_vf)
1970     {
1971       if (dump_enabled_p ())
1972             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1973                              "bad data dependence.\n");
1974       return false;
1975     }
1976
1977   ok = vect_determine_vectorization_factor (loop_vinfo);
1978   if (!ok)
1979     {
1980       if (dump_enabled_p ())
1981         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1982                          "can't determine vectorization factor.\n");
1983       return false;
1984     }
1985   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1986     {
1987       if (dump_enabled_p ())
1988         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1989                          "bad data dependence.\n");
1990       return false;
1991     }
1992
1993   /* Compute the scalar iteration cost.  */
1994   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1995
1996   int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1997   HOST_WIDE_INT estimated_niter;
1998   unsigned th;
1999   int min_scalar_loop_bound;
2000
2001   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
2002   ok = vect_analyze_slp (loop_vinfo, n_stmts);
2003   if (!ok)
2004     return false;
2005
2006   /* If there are any SLP instances mark them as pure_slp.  */
2007   bool slp = vect_make_slp_decision (loop_vinfo);
2008   if (slp)
2009     {
2010       /* Find stmts that need to be both vectorized and SLPed.  */
2011       vect_detect_hybrid_slp (loop_vinfo);
2012
2013       /* Update the vectorization factor based on the SLP decision.  */
2014       vect_update_vf_for_slp (loop_vinfo);
2015     }
2016
2017   /* This is the point where we can re-start analysis with SLP forced off.  */
2018 start_over:
2019
2020   /* Now the vectorization factor is final.  */
2021   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2022   gcc_assert (vectorization_factor != 0);
2023
2024   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
2025     dump_printf_loc (MSG_NOTE, vect_location,
2026                      "vectorization_factor = %d, niters = "
2027                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
2028                      LOOP_VINFO_INT_NITERS (loop_vinfo));
2029
2030   HOST_WIDE_INT max_niter
2031     = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2032   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2033        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
2034       || (max_niter != -1
2035           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
2036     {
2037       if (dump_enabled_p ())
2038         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2039                          "not vectorized: iteration count smaller than "
2040                          "vectorization factor.\n");
2041       return false;
2042     }
2043
2044   /* Analyze the alignment of the data-refs in the loop.
2045      Fail if a data reference is found that cannot be vectorized.  */
2046
2047   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2048   if (!ok)
2049     {
2050       if (dump_enabled_p ())
2051         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2052                          "bad data alignment.\n");
2053       return false;
2054     }
2055
2056   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2057      It is important to call pruning after vect_analyze_data_ref_accesses,
2058      since we use grouping information gathered by interleaving analysis.  */
2059   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2060   if (!ok)
2061     return false;
2062
2063   /* Do not invoke vect_enhance_data_refs_alignment for eplilogue
2064      vectorization.  */
2065   if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2066     {
2067     /* This pass will decide on using loop versioning and/or loop peeling in
2068        order to enhance the alignment of data references in the loop.  */
2069     ok = vect_enhance_data_refs_alignment (loop_vinfo);
2070     if (!ok)
2071       {
2072         if (dump_enabled_p ())
2073           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2074                            "bad data alignment.\n");
2075         return false;
2076       }
2077     }
2078
2079   if (slp)
2080     {
2081       /* Analyze operations in the SLP instances.  Note this may
2082          remove unsupported SLP instances which makes the above
2083          SLP kind detection invalid.  */
2084       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2085       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2086                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2087       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2088         goto again;
2089     }
2090
2091   /* Scan all the remaining operations in the loop that are not subject
2092      to SLP and make sure they are vectorizable.  */
2093   ok = vect_analyze_loop_operations (loop_vinfo);
2094   if (!ok)
2095     {
2096       if (dump_enabled_p ())
2097         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2098                          "bad operation or unsupported loop bound.\n");
2099       return false;
2100     }
2101
2102   /* If epilog loop is required because of data accesses with gaps,
2103      one additional iteration needs to be peeled.  Check if there is
2104      enough iterations for vectorization.  */
2105   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2106       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2107     {
2108       int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2109       tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2110
2111       if (wi::to_widest (scalar_niters) < vf)
2112         {
2113           if (dump_enabled_p ())
2114             dump_printf_loc (MSG_NOTE, vect_location,
2115                              "loop has no enough iterations to support"
2116                              " peeling for gaps.\n");
2117           return false;
2118         }
2119     }
2120
2121   /* Analyze cost.  Decide if worth while to vectorize.  */
2122   int min_profitable_estimate, min_profitable_iters;
2123   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2124                                       &min_profitable_estimate);
2125
2126   if (min_profitable_iters < 0)
2127     {
2128       if (dump_enabled_p ())
2129         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130                          "not vectorized: vectorization not profitable.\n");
2131       if (dump_enabled_p ())
2132         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133                          "not vectorized: vector version will never be "
2134                          "profitable.\n");
2135       goto again;
2136     }
2137
2138   min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2139                            * vectorization_factor);
2140
2141   /* Use the cost model only if it is more conservative than user specified
2142      threshold.  */
2143   th = (unsigned) MAX (min_scalar_loop_bound, min_profitable_iters);
2144
2145   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2146
2147   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2148       && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
2149     {
2150       if (dump_enabled_p ())
2151         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2152                          "not vectorized: vectorization not profitable.\n");
2153       if (dump_enabled_p ())
2154         dump_printf_loc (MSG_NOTE, vect_location,
2155                          "not vectorized: iteration count smaller than user "
2156                          "specified loop bound parameter or minimum profitable "
2157                          "iterations (whichever is more conservative).\n");
2158       goto again;
2159     }
2160
2161   estimated_niter
2162     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2163   if (estimated_niter == -1)
2164     estimated_niter = max_niter;
2165   if (estimated_niter != -1
2166       && ((unsigned HOST_WIDE_INT) estimated_niter
2167           < MAX (th, (unsigned) min_profitable_estimate)))
2168     {
2169       if (dump_enabled_p ())
2170         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2171                          "not vectorized: estimated iteration count too "
2172                          "small.\n");
2173       if (dump_enabled_p ())
2174         dump_printf_loc (MSG_NOTE, vect_location,
2175                          "not vectorized: estimated iteration count smaller "
2176                          "than specified loop bound parameter or minimum "
2177                          "profitable iterations (whichever is more "
2178                          "conservative).\n");
2179       goto again;
2180     }
2181
2182   /* Decide whether we need to create an epilogue loop to handle
2183      remaining scalar iterations.  */
2184   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)
2185          / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2186         * LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2187
2188   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2189       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2190     {
2191       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2192                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2193           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2194         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2195     }
2196   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2197            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2198                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2199                /* In case of versioning, check if the maximum number of
2200                   iterations is greater than th.  If they are identical,
2201                   the epilogue is unnecessary.  */
2202                && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2203                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2204     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2205
2206   /* If an epilogue loop is required make sure we can create one.  */
2207   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2208       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2209     {
2210       if (dump_enabled_p ())
2211         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2212       if (!vect_can_advance_ivs_p (loop_vinfo)
2213           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2214                                            single_exit (LOOP_VINFO_LOOP
2215                                                          (loop_vinfo))))
2216         {
2217           if (dump_enabled_p ())
2218             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219                              "not vectorized: can't create required "
2220                              "epilog loop\n");
2221           goto again;
2222         }
2223     }
2224
2225   /* During peeling, we need to check if number of loop iterations is
2226      enough for both peeled prolog loop and vector loop.  This check
2227      can be merged along with threshold check of loop versioning, so
2228      increase threshold for this case if necessary.  */
2229   if (LOOP_REQUIRES_VERSIONING (loop_vinfo)
2230       && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2231           || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
2232     {
2233       unsigned niters_th;
2234
2235       /* Niters for peeled prolog loop.  */
2236       if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2237         {
2238           struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2239           tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2240
2241           niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1;
2242         }
2243       else
2244         niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2245
2246       /* Niters for at least one iteration of vectorized loop.  */
2247       niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2248       /* One additional iteration because of peeling for gap.  */
2249       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2250         niters_th++;
2251       if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th)
2252         LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th;
2253     }
2254
2255   gcc_assert (vectorization_factor
2256               == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2257
2258   /* Ok to vectorize!  */
2259   return true;
2260
2261 again:
2262   /* Try again with SLP forced off but if we didn't do any SLP there is
2263      no point in re-trying.  */
2264   if (!slp)
2265     return false;
2266
2267   /* If there are reduction chains re-trying will fail anyway.  */
2268   if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
2269     return false;
2270
2271   /* Likewise if the grouped loads or stores in the SLP cannot be handled
2272      via interleaving or lane instructions.  */
2273   slp_instance instance;
2274   slp_tree node;
2275   unsigned i, j;
2276   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2277     {
2278       stmt_vec_info vinfo;
2279       vinfo = vinfo_for_stmt
2280           (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2281       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2282         continue;
2283       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2284       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
2285       tree vectype = STMT_VINFO_VECTYPE (vinfo);
2286       if (! vect_store_lanes_supported (vectype, size)
2287           && ! vect_grouped_store_supported (vectype, size))
2288         return false;
2289       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2290         {
2291           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2292           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2293           bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
2294           size = STMT_VINFO_GROUP_SIZE (vinfo);
2295           vectype = STMT_VINFO_VECTYPE (vinfo);
2296           if (! vect_load_lanes_supported (vectype, size)
2297               && ! vect_grouped_load_supported (vectype, single_element_p,
2298                                                 size))
2299             return false;
2300         }
2301     }
2302
2303   if (dump_enabled_p ())
2304     dump_printf_loc (MSG_NOTE, vect_location,
2305                      "re-trying with SLP disabled\n");
2306
2307   /* Roll back state appropriately.  No SLP this time.  */
2308   slp = false;
2309   /* Restore vectorization factor as it were without SLP.  */
2310   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2311   /* Free the SLP instances.  */
2312   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2313     vect_free_slp_instance (instance);
2314   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2315   /* Reset SLP type to loop_vect on all stmts.  */
2316   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2317     {
2318       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2319       for (gimple_stmt_iterator si = gsi_start_phis (bb);
2320            !gsi_end_p (si); gsi_next (&si))
2321         {
2322           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2323           STMT_SLP_TYPE (stmt_info) = loop_vect;
2324         }
2325       for (gimple_stmt_iterator si = gsi_start_bb (bb);
2326            !gsi_end_p (si); gsi_next (&si))
2327         {
2328           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2329           STMT_SLP_TYPE (stmt_info) = loop_vect;
2330           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2331             {
2332               stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2333               STMT_SLP_TYPE (stmt_info) = loop_vect;
2334               for (gimple_stmt_iterator pi
2335                      = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2336                    !gsi_end_p (pi); gsi_next (&pi))
2337                 {
2338                   gimple *pstmt = gsi_stmt (pi);
2339                   STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2340                 }
2341             }
2342         }
2343     }
2344   /* Free optimized alias test DDRS.  */
2345   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2346   /* Reset target cost data.  */
2347   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2348   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2349     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2350   /* Reset assorted flags.  */
2351   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2352   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2353   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2354
2355   goto start_over;
2356 }
2357
2358 /* Function vect_analyze_loop.
2359
2360    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2361    for it.  The different analyses will record information in the
2362    loop_vec_info struct.  If ORIG_LOOP_VINFO is not NULL epilogue must
2363    be vectorized.  */
2364 loop_vec_info
2365 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
2366 {
2367   loop_vec_info loop_vinfo;
2368   unsigned int vector_sizes;
2369
2370   /* Autodetect first vector size we try.  */
2371   current_vector_size = 0;
2372   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2373
2374   if (dump_enabled_p ())
2375     dump_printf_loc (MSG_NOTE, vect_location,
2376                      "===== analyze_loop_nest =====\n");
2377
2378   if (loop_outer (loop)
2379       && loop_vec_info_for_loop (loop_outer (loop))
2380       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2381     {
2382       if (dump_enabled_p ())
2383         dump_printf_loc (MSG_NOTE, vect_location,
2384                          "outer-loop already vectorized.\n");
2385       return NULL;
2386     }
2387
2388   while (1)
2389     {
2390       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2391       loop_vinfo = vect_analyze_loop_form (loop);
2392       if (!loop_vinfo)
2393         {
2394           if (dump_enabled_p ())
2395             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396                              "bad loop form.\n");
2397           return NULL;
2398         }
2399
2400       bool fatal = false;
2401
2402       if (orig_loop_vinfo)
2403         LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
2404
2405       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2406         {
2407           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2408
2409           return loop_vinfo;
2410         }
2411
2412       destroy_loop_vec_info (loop_vinfo, true);
2413
2414       vector_sizes &= ~current_vector_size;
2415       if (fatal
2416           || vector_sizes == 0
2417           || current_vector_size == 0)
2418         return NULL;
2419
2420       /* Try the next biggest vector size.  */
2421       current_vector_size = 1 << floor_log2 (vector_sizes);
2422       if (dump_enabled_p ())
2423         dump_printf_loc (MSG_NOTE, vect_location,
2424                          "***** Re-trying analysis with "
2425                          "vector size %d\n", current_vector_size);
2426     }
2427 }
2428
2429
2430 /* Function reduction_code_for_scalar_code
2431
2432    Input:
2433    CODE - tree_code of a reduction operations.
2434
2435    Output:
2436    REDUC_CODE - the corresponding tree-code to be used to reduce the
2437       vector of partial results into a single scalar result, or ERROR_MARK
2438       if the operation is a supported reduction operation, but does not have
2439       such a tree-code.
2440
2441    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2442
2443 static bool
2444 reduction_code_for_scalar_code (enum tree_code code,
2445                                 enum tree_code *reduc_code)
2446 {
2447   switch (code)
2448     {
2449       case MAX_EXPR:
2450         *reduc_code = REDUC_MAX_EXPR;
2451         return true;
2452
2453       case MIN_EXPR:
2454         *reduc_code = REDUC_MIN_EXPR;
2455         return true;
2456
2457       case PLUS_EXPR:
2458         *reduc_code = REDUC_PLUS_EXPR;
2459         return true;
2460
2461       case MULT_EXPR:
2462       case MINUS_EXPR:
2463       case BIT_IOR_EXPR:
2464       case BIT_XOR_EXPR:
2465       case BIT_AND_EXPR:
2466         *reduc_code = ERROR_MARK;
2467         return true;
2468
2469       default:
2470        return false;
2471     }
2472 }
2473
2474
2475 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2476    STMT is printed with a message MSG. */
2477
2478 static void
2479 report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
2480 {
2481   dump_printf_loc (msg_type, vect_location, "%s", msg);
2482   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2483 }
2484
2485
2486 /* Detect SLP reduction of the form:
2487
2488    #a1 = phi <a5, a0>
2489    a2 = operation (a1)
2490    a3 = operation (a2)
2491    a4 = operation (a3)
2492    a5 = operation (a4)
2493
2494    #a = phi <a5>
2495
2496    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2497    FIRST_STMT is the first reduction stmt in the chain
2498    (a2 = operation (a1)).
2499
2500    Return TRUE if a reduction chain was detected.  */
2501
2502 static bool
2503 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2504                        gimple *first_stmt)
2505 {
2506   struct loop *loop = (gimple_bb (phi))->loop_father;
2507   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2508   enum tree_code code;
2509   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2510   stmt_vec_info use_stmt_info, current_stmt_info;
2511   tree lhs;
2512   imm_use_iterator imm_iter;
2513   use_operand_p use_p;
2514   int nloop_uses, size = 0, n_out_of_loop_uses;
2515   bool found = false;
2516
2517   if (loop != vect_loop)
2518     return false;
2519
2520   lhs = PHI_RESULT (phi);
2521   code = gimple_assign_rhs_code (first_stmt);
2522   while (1)
2523     {
2524       nloop_uses = 0;
2525       n_out_of_loop_uses = 0;
2526       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2527         {
2528           gimple *use_stmt = USE_STMT (use_p);
2529           if (is_gimple_debug (use_stmt))
2530             continue;
2531
2532           /* Check if we got back to the reduction phi.  */
2533           if (use_stmt == phi)
2534             {
2535               loop_use_stmt = use_stmt;
2536               found = true;
2537               break;
2538             }
2539
2540           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2541             {
2542               loop_use_stmt = use_stmt;
2543               nloop_uses++;
2544             }
2545            else
2546              n_out_of_loop_uses++;
2547
2548            /* There are can be either a single use in the loop or two uses in
2549               phi nodes.  */
2550            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2551              return false;
2552         }
2553
2554       if (found)
2555         break;
2556
2557       /* We reached a statement with no loop uses.  */
2558       if (nloop_uses == 0)
2559         return false;
2560
2561       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2562       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2563         return false;
2564
2565       if (!is_gimple_assign (loop_use_stmt)
2566           || code != gimple_assign_rhs_code (loop_use_stmt)
2567           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2568         return false;
2569
2570       /* Insert USE_STMT into reduction chain.  */
2571       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2572       if (current_stmt)
2573         {
2574           current_stmt_info = vinfo_for_stmt (current_stmt);
2575           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2576           GROUP_FIRST_ELEMENT (use_stmt_info)
2577             = GROUP_FIRST_ELEMENT (current_stmt_info);
2578         }
2579       else
2580         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2581
2582       lhs = gimple_assign_lhs (loop_use_stmt);
2583       current_stmt = loop_use_stmt;
2584       size++;
2585    }
2586
2587   if (!found || loop_use_stmt != phi || size < 2)
2588     return false;
2589
2590   /* Swap the operands, if needed, to make the reduction operand be the second
2591      operand.  */
2592   lhs = PHI_RESULT (phi);
2593   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2594   while (next_stmt)
2595     {
2596       if (gimple_assign_rhs2 (next_stmt) == lhs)
2597         {
2598           tree op = gimple_assign_rhs1 (next_stmt);
2599           gimple *def_stmt = NULL;
2600
2601           if (TREE_CODE (op) == SSA_NAME)
2602             def_stmt = SSA_NAME_DEF_STMT (op);
2603
2604           /* Check that the other def is either defined in the loop
2605              ("vect_internal_def"), or it's an induction (defined by a
2606              loop-header phi-node).  */
2607           if (def_stmt
2608               && gimple_bb (def_stmt)
2609               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2610               && (is_gimple_assign (def_stmt)
2611                   || is_gimple_call (def_stmt)
2612                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2613                            == vect_induction_def
2614                   || (gimple_code (def_stmt) == GIMPLE_PHI
2615                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2616                                   == vect_internal_def
2617                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2618             {
2619               lhs = gimple_assign_lhs (next_stmt);
2620               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2621               continue;
2622             }
2623
2624           return false;
2625         }
2626       else
2627         {
2628           tree op = gimple_assign_rhs2 (next_stmt);
2629           gimple *def_stmt = NULL;
2630
2631           if (TREE_CODE (op) == SSA_NAME)
2632             def_stmt = SSA_NAME_DEF_STMT (op);
2633
2634           /* Check that the other def is either defined in the loop
2635             ("vect_internal_def"), or it's an induction (defined by a
2636             loop-header phi-node).  */
2637           if (def_stmt
2638               && gimple_bb (def_stmt)
2639               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2640               && (is_gimple_assign (def_stmt)
2641                   || is_gimple_call (def_stmt)
2642                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2643                               == vect_induction_def
2644                   || (gimple_code (def_stmt) == GIMPLE_PHI
2645                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2646                                   == vect_internal_def
2647                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2648             {
2649               if (dump_enabled_p ())
2650                 {
2651                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2652                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2653                 }
2654
2655               swap_ssa_operands (next_stmt,
2656                                  gimple_assign_rhs1_ptr (next_stmt),
2657                                  gimple_assign_rhs2_ptr (next_stmt));
2658               update_stmt (next_stmt);
2659
2660               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2661                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2662             }
2663           else
2664             return false;
2665         }
2666
2667       lhs = gimple_assign_lhs (next_stmt);
2668       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2669     }
2670
2671   /* Save the chain for further analysis in SLP detection.  */
2672   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2673   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2674   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2675
2676   return true;
2677 }
2678
2679
2680 /* Function vect_is_simple_reduction
2681
2682    (1) Detect a cross-iteration def-use cycle that represents a simple
2683    reduction computation.  We look for the following pattern:
2684
2685    loop_header:
2686      a1 = phi < a0, a2 >
2687      a3 = ...
2688      a2 = operation (a3, a1)
2689
2690    or
2691
2692    a3 = ...
2693    loop_header:
2694      a1 = phi < a0, a2 >
2695      a2 = operation (a3, a1)
2696
2697    such that:
2698    1. operation is commutative and associative and it is safe to
2699       change the order of the computation
2700    2. no uses for a2 in the loop (a2 is used out of the loop)
2701    3. no uses of a1 in the loop besides the reduction operation
2702    4. no uses of a1 outside the loop.
2703
2704    Conditions 1,4 are tested here.
2705    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2706
2707    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2708    nested cycles.
2709
2710    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2711    reductions:
2712
2713      a1 = phi < a0, a2 >
2714      inner loop (def of a3)
2715      a2 = phi < a3 >
2716
2717    (4) Detect condition expressions, ie:
2718      for (int i = 0; i < N; i++)
2719        if (a[i] < val)
2720         ret_val = a[i];
2721
2722 */
2723
2724 static gimple *
2725 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2726                           bool *double_reduc,
2727                           bool need_wrapping_integral_overflow,
2728                           enum vect_reduction_type *v_reduc_type)
2729 {
2730   struct loop *loop = (gimple_bb (phi))->loop_father;
2731   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2732   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
2733   enum tree_code orig_code, code;
2734   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2735   tree type;
2736   int nloop_uses;
2737   tree name;
2738   imm_use_iterator imm_iter;
2739   use_operand_p use_p;
2740   bool phi_def;
2741
2742   *double_reduc = false;
2743   *v_reduc_type = TREE_CODE_REDUCTION;
2744
2745   name = PHI_RESULT (phi);
2746   /* ???  If there are no uses of the PHI result the inner loop reduction
2747      won't be detected as possibly double-reduction by vectorizable_reduction
2748      because that tries to walk the PHI arg from the preheader edge which
2749      can be constant.  See PR60382.  */
2750   if (has_zero_uses (name))
2751     return NULL;
2752   nloop_uses = 0;
2753   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2754     {
2755       gimple *use_stmt = USE_STMT (use_p);
2756       if (is_gimple_debug (use_stmt))
2757         continue;
2758
2759       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2760         {
2761           if (dump_enabled_p ())
2762             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2763                              "intermediate value used outside loop.\n");
2764
2765           return NULL;
2766         }
2767
2768       nloop_uses++;
2769       if (nloop_uses > 1)
2770         {
2771           if (dump_enabled_p ())
2772             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2773                              "reduction value used in loop.\n");
2774           return NULL;
2775         }
2776
2777       phi_use_stmt = use_stmt;
2778     }
2779
2780   edge latch_e = loop_latch_edge (loop);
2781   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2782   if (TREE_CODE (loop_arg) != SSA_NAME)
2783     {
2784       if (dump_enabled_p ())
2785         {
2786           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2787                            "reduction: not ssa_name: ");
2788           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2789           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2790         }
2791       return NULL;
2792     }
2793
2794   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2795   if (is_gimple_assign (def_stmt))
2796     {
2797       name = gimple_assign_lhs (def_stmt);
2798       phi_def = false;
2799     }
2800   else if (gimple_code (def_stmt) == GIMPLE_PHI)
2801     {
2802       name = PHI_RESULT (def_stmt);
2803       phi_def = true;
2804     }
2805   else
2806     {
2807       if (dump_enabled_p ())
2808         {
2809           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2810                            "reduction: unhandled reduction operation: ");
2811           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
2812         }
2813       return NULL;
2814     }
2815
2816   nloop_uses = 0;
2817   auto_vec<gphi *, 3> lcphis;
2818   if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
2819     FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2820       {
2821         gimple *use_stmt = USE_STMT (use_p);
2822         if (is_gimple_debug (use_stmt))
2823           continue;
2824         if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2825           nloop_uses++;
2826         else
2827           /* We can have more than one loop-closed PHI.  */
2828           lcphis.safe_push (as_a <gphi *> (use_stmt));
2829         if (nloop_uses > 1)
2830           {
2831             if (dump_enabled_p ())
2832               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2833                                "reduction used in loop.\n");
2834             return NULL;
2835           }
2836       }
2837
2838   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2839      defined in the inner loop.  */
2840   if (phi_def)
2841     {
2842       op1 = PHI_ARG_DEF (def_stmt, 0);
2843
2844       if (gimple_phi_num_args (def_stmt) != 1
2845           || TREE_CODE (op1) != SSA_NAME)
2846         {
2847           if (dump_enabled_p ())
2848             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2849                              "unsupported phi node definition.\n");
2850
2851           return NULL;
2852         }
2853
2854       def1 = SSA_NAME_DEF_STMT (op1);
2855       if (gimple_bb (def1)
2856           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2857           && loop->inner
2858           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2859           && is_gimple_assign (def1)
2860           && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
2861         {
2862           if (dump_enabled_p ())
2863             report_vect_op (MSG_NOTE, def_stmt,
2864                             "detected double reduction: ");
2865
2866           *double_reduc = true;
2867           return def_stmt;
2868         }
2869
2870       return NULL;
2871     }
2872
2873   /* If we are vectorizing an inner reduction we are executing that
2874      in the original order only in case we are not dealing with a
2875      double reduction.  */
2876   bool check_reduction = true;
2877   if (flow_loop_nested_p (vect_loop, loop))
2878     {
2879       gphi *lcphi;
2880       unsigned i;
2881       check_reduction = false;
2882       FOR_EACH_VEC_ELT (lcphis, i, lcphi)
2883         FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
2884           {
2885             gimple *use_stmt = USE_STMT (use_p);
2886             if (is_gimple_debug (use_stmt))
2887               continue;
2888             if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
2889               check_reduction = true;
2890           }
2891     }
2892
2893   bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
2894   code = orig_code = gimple_assign_rhs_code (def_stmt);
2895
2896   /* We can handle "res -= x[i]", which is non-associative by
2897      simply rewriting this into "res += -x[i]".  Avoid changing
2898      gimple instruction for the first simple tests and only do this
2899      if we're allowed to change code at all.  */
2900   if (code == MINUS_EXPR
2901       && (op1 = gimple_assign_rhs1 (def_stmt))
2902       && TREE_CODE (op1) == SSA_NAME
2903       && SSA_NAME_DEF_STMT (op1) == phi)
2904     code = PLUS_EXPR;
2905
2906   if (code == COND_EXPR)
2907     {
2908       if (! nested_in_vect_loop)
2909         *v_reduc_type = COND_REDUCTION;
2910
2911       op3 = gimple_assign_rhs1 (def_stmt);
2912       if (COMPARISON_CLASS_P (op3))
2913         {
2914           op4 = TREE_OPERAND (op3, 1);
2915           op3 = TREE_OPERAND (op3, 0);
2916         }
2917
2918       op1 = gimple_assign_rhs2 (def_stmt);
2919       op2 = gimple_assign_rhs3 (def_stmt);
2920     }
2921   else if (!commutative_tree_code (code) || !associative_tree_code (code))
2922     {
2923       if (dump_enabled_p ())
2924         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2925                         "reduction: not commutative/associative: ");
2926       return NULL;
2927     }
2928   else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
2929     {
2930       op1 = gimple_assign_rhs1 (def_stmt);
2931       op2 = gimple_assign_rhs2 (def_stmt);
2932     }
2933   else
2934     {
2935       if (dump_enabled_p ())
2936         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2937                         "reduction: not handled operation: ");
2938       return NULL;
2939     }
2940
2941   if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2942     {
2943       if (dump_enabled_p ())
2944         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2945                         "reduction: both uses not ssa_names: ");
2946
2947       return NULL;
2948     }
2949
2950   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2951   if ((TREE_CODE (op1) == SSA_NAME
2952        && !types_compatible_p (type,TREE_TYPE (op1)))
2953       || (TREE_CODE (op2) == SSA_NAME
2954           && !types_compatible_p (type, TREE_TYPE (op2)))
2955       || (op3 && TREE_CODE (op3) == SSA_NAME
2956           && !types_compatible_p (type, TREE_TYPE (op3)))
2957       || (op4 && TREE_CODE (op4) == SSA_NAME
2958           && !types_compatible_p (type, TREE_TYPE (op4))))
2959     {
2960       if (dump_enabled_p ())
2961         {
2962           dump_printf_loc (MSG_NOTE, vect_location,
2963                            "reduction: multiple types: operation type: ");
2964           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2965           dump_printf (MSG_NOTE, ", operands types: ");
2966           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2967                              TREE_TYPE (op1));
2968           dump_printf (MSG_NOTE, ",");
2969           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2970                              TREE_TYPE (op2));
2971           if (op3)
2972             {
2973               dump_printf (MSG_NOTE, ",");
2974               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2975                                  TREE_TYPE (op3));
2976             }
2977
2978           if (op4)
2979             {
2980               dump_printf (MSG_NOTE, ",");
2981               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2982                                  TREE_TYPE (op4));
2983             }
2984           dump_printf (MSG_NOTE, "\n");
2985         }
2986
2987       return NULL;
2988     }
2989
2990   /* Check that it's ok to change the order of the computation.
2991      Generally, when vectorizing a reduction we change the order of the
2992      computation.  This may change the behavior of the program in some
2993      cases, so we need to check that this is ok.  One exception is when
2994      vectorizing an outer-loop: the inner-loop is executed sequentially,
2995      and therefore vectorizing reductions in the inner-loop during
2996      outer-loop vectorization is safe.  */
2997
2998   if (*v_reduc_type != COND_REDUCTION
2999       && check_reduction)
3000     {
3001       /* CHECKME: check for !flag_finite_math_only too?  */
3002       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math)
3003         {
3004           /* Changing the order of operations changes the semantics.  */
3005           if (dump_enabled_p ())
3006             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3007                         "reduction: unsafe fp math optimization: ");
3008           return NULL;
3009         }
3010       else if (INTEGRAL_TYPE_P (type))
3011         {
3012           if (!operation_no_trapping_overflow (type, code))
3013             {
3014               /* Changing the order of operations changes the semantics.  */
3015               if (dump_enabled_p ())
3016                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3017                                 "reduction: unsafe int math optimization"
3018                                 " (overflow traps): ");
3019               return NULL;
3020             }
3021           if (need_wrapping_integral_overflow
3022               && !TYPE_OVERFLOW_WRAPS (type)
3023               && operation_can_overflow (code))
3024             {
3025               /* Changing the order of operations changes the semantics.  */
3026               if (dump_enabled_p ())
3027                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3028                                 "reduction: unsafe int math optimization"
3029                                 " (overflow doesn't wrap): ");
3030               return NULL;
3031             }
3032         }
3033       else if (SAT_FIXED_POINT_TYPE_P (type))
3034         {
3035           /* Changing the order of operations changes the semantics.  */
3036           if (dump_enabled_p ())
3037           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3038                           "reduction: unsafe fixed-point math optimization: ");
3039           return NULL;
3040         }
3041     }
3042
3043   /* Reduction is safe. We're dealing with one of the following:
3044      1) integer arithmetic and no trapv
3045      2) floating point arithmetic, and special flags permit this optimization
3046      3) nested cycle (i.e., outer loop vectorization).  */
3047   if (TREE_CODE (op1) == SSA_NAME)
3048     def1 = SSA_NAME_DEF_STMT (op1);
3049
3050   if (TREE_CODE (op2) == SSA_NAME)
3051     def2 = SSA_NAME_DEF_STMT (op2);
3052
3053   if (code != COND_EXPR
3054       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
3055     {
3056       if (dump_enabled_p ())
3057         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
3058       return NULL;
3059     }
3060
3061   /* Check that one def is the reduction def, defined by PHI,
3062      the other def is either defined in the loop ("vect_internal_def"),
3063      or it's an induction (defined by a loop-header phi-node).  */
3064
3065   if (def2 && def2 == phi
3066       && (code == COND_EXPR
3067           || !def1 || gimple_nop_p (def1)
3068           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
3069           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
3070               && (is_gimple_assign (def1)
3071                   || is_gimple_call (def1)
3072                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3073                       == vect_induction_def
3074                   || (gimple_code (def1) == GIMPLE_PHI
3075                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3076                           == vect_internal_def
3077                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
3078     {
3079       if (dump_enabled_p ())
3080         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3081       return def_stmt;
3082     }
3083
3084   if (def1 && def1 == phi
3085       && (code == COND_EXPR
3086           || !def2 || gimple_nop_p (def2)
3087           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
3088           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
3089               && (is_gimple_assign (def2)
3090                   || is_gimple_call (def2)
3091                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3092                        == vect_induction_def
3093                   || (gimple_code (def2) == GIMPLE_PHI
3094                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3095                            == vect_internal_def
3096                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
3097     {
3098       if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
3099         {
3100           /* Check if we can swap operands (just for simplicity - so that
3101              the rest of the code can assume that the reduction variable
3102              is always the last (second) argument).  */
3103           if (code == COND_EXPR)
3104             {
3105               /* Swap cond_expr by inverting the condition.  */
3106               tree cond_expr = gimple_assign_rhs1 (def_stmt);
3107               enum tree_code invert_code = ERROR_MARK;
3108               enum tree_code cond_code = TREE_CODE (cond_expr);
3109
3110               if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
3111                 {
3112                   bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
3113                   invert_code = invert_tree_comparison (cond_code, honor_nans);
3114                 }
3115               if (invert_code != ERROR_MARK)
3116                 {
3117                   TREE_SET_CODE (cond_expr, invert_code);
3118                   swap_ssa_operands (def_stmt,
3119                                      gimple_assign_rhs2_ptr (def_stmt),
3120                                      gimple_assign_rhs3_ptr (def_stmt));
3121                 }
3122               else
3123                 {
3124                   if (dump_enabled_p ())
3125                     report_vect_op (MSG_NOTE, def_stmt,
3126                                     "detected reduction: cannot swap operands "
3127                                     "for cond_expr");
3128                   return NULL;
3129                 }
3130             }
3131           else
3132             swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
3133                                gimple_assign_rhs2_ptr (def_stmt));
3134
3135           if (dump_enabled_p ())
3136             report_vect_op (MSG_NOTE, def_stmt,
3137                             "detected reduction: need to swap operands: ");
3138
3139           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
3140             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
3141         }
3142       else
3143         {
3144           if (dump_enabled_p ())
3145             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3146         }
3147
3148       return def_stmt;
3149     }
3150
3151   /* Try to find SLP reduction chain.  */
3152   if (! nested_in_vect_loop
3153       && code != COND_EXPR
3154       && vect_is_slp_reduction (loop_info, phi, def_stmt))
3155     {
3156       if (dump_enabled_p ())
3157         report_vect_op (MSG_NOTE, def_stmt,
3158                         "reduction: detected reduction chain: ");
3159
3160       return def_stmt;
3161     }
3162
3163   if (dump_enabled_p ())
3164     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3165                     "reduction: unknown pattern: ");
3166
3167   return NULL;
3168 }
3169
3170 /* Wrapper around vect_is_simple_reduction, which will modify code
3171    in-place if it enables detection of more reductions.  Arguments
3172    as there.  */
3173
3174 gimple *
3175 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
3176                              bool *double_reduc,
3177                              bool need_wrapping_integral_overflow)
3178 {
3179   enum vect_reduction_type v_reduc_type;
3180   gimple *def = vect_is_simple_reduction (loop_info, phi, double_reduc,
3181                                           need_wrapping_integral_overflow,
3182                                           &v_reduc_type);
3183   if (def)
3184     {
3185       stmt_vec_info reduc_def_info = vinfo_for_stmt (phi);
3186       STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type;
3187       STMT_VINFO_REDUC_DEF (reduc_def_info) = def;
3188       reduc_def_info = vinfo_for_stmt (def);
3189       STMT_VINFO_REDUC_DEF (reduc_def_info) = phi;
3190     }
3191   return def;
3192 }
3193
3194 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
3195 int
3196 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3197                              int *peel_iters_epilogue,
3198                              stmt_vector_for_cost *scalar_cost_vec,
3199                              stmt_vector_for_cost *prologue_cost_vec,
3200                              stmt_vector_for_cost *epilogue_cost_vec)
3201 {
3202   int retval = 0;
3203   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3204
3205   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3206     {
3207       *peel_iters_epilogue = vf/2;
3208       if (dump_enabled_p ())
3209         dump_printf_loc (MSG_NOTE, vect_location,
3210                          "cost model: epilogue peel iters set to vf/2 "
3211                          "because loop iterations are unknown .\n");
3212
3213       /* If peeled iterations are known but number of scalar loop
3214          iterations are unknown, count a taken branch per peeled loop.  */
3215       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3216                                  NULL, 0, vect_prologue);
3217       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3218                                  NULL, 0, vect_epilogue);
3219     }
3220   else
3221     {
3222       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3223       peel_iters_prologue = niters < peel_iters_prologue ?
3224                             niters : peel_iters_prologue;
3225       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
3226       /* If we need to peel for gaps, but no peeling is required, we have to
3227          peel VF iterations.  */
3228       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3229         *peel_iters_epilogue = vf;
3230     }
3231
3232   stmt_info_for_cost *si;
3233   int j;
3234   if (peel_iters_prologue)
3235     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3236         {
3237           stmt_vec_info stmt_info
3238             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3239           retval += record_stmt_cost (prologue_cost_vec,
3240                                       si->count * peel_iters_prologue,
3241                                       si->kind, stmt_info, si->misalign,
3242                                       vect_prologue);
3243         }
3244   if (*peel_iters_epilogue)
3245     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3246         {
3247           stmt_vec_info stmt_info
3248             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3249           retval += record_stmt_cost (epilogue_cost_vec,
3250                                       si->count * *peel_iters_epilogue,
3251                                       si->kind, stmt_info, si->misalign,
3252                                       vect_epilogue);
3253         }
3254
3255   return retval;
3256 }
3257
3258 /* Function vect_estimate_min_profitable_iters
3259
3260    Return the number of iterations required for the vector version of the
3261    loop to be profitable relative to the cost of the scalar version of the
3262    loop.
3263
3264    *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
3265    of iterations for vectorization.  -1 value means loop vectorization
3266    is not profitable.  This returned value may be used for dynamic
3267    profitability check.
3268
3269    *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
3270    for static check against estimated number of iterations.  */
3271
3272 static void
3273 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3274                                     int *ret_min_profitable_niters,
3275                                     int *ret_min_profitable_estimate)
3276 {
3277   int min_profitable_iters;
3278   int min_profitable_estimate;
3279   int peel_iters_prologue;
3280   int peel_iters_epilogue;
3281   unsigned vec_inside_cost = 0;
3282   int vec_outside_cost = 0;
3283   unsigned vec_prologue_cost = 0;
3284   unsigned vec_epilogue_cost = 0;
3285   int scalar_single_iter_cost = 0;
3286   int scalar_outside_cost = 0;
3287   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3288   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3289   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3290
3291   /* Cost model disabled.  */
3292   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3293     {
3294       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3295       *ret_min_profitable_niters = 0;
3296       *ret_min_profitable_estimate = 0;
3297       return;
3298     }
3299
3300   /* Requires loop versioning tests to handle misalignment.  */
3301   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3302     {
3303       /*  FIXME: Make cost depend on complexity of individual check.  */
3304       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3305       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3306                             vect_prologue);
3307       dump_printf (MSG_NOTE,
3308                    "cost model: Adding cost of checks for loop "
3309                    "versioning to treat misalignment.\n");
3310     }
3311
3312   /* Requires loop versioning with alias checks.  */
3313   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3314     {
3315       /*  FIXME: Make cost depend on complexity of individual check.  */
3316       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3317       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3318                             vect_prologue);
3319       dump_printf (MSG_NOTE,
3320                    "cost model: Adding cost of checks for loop "
3321                    "versioning aliasing.\n");
3322     }
3323
3324   /* Requires loop versioning with niter checks.  */
3325   if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
3326     {
3327       /*  FIXME: Make cost depend on complexity of individual check.  */
3328       (void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
3329                             vect_prologue);
3330       dump_printf (MSG_NOTE,
3331                    "cost model: Adding cost of checks for loop "
3332                    "versioning niters.\n");
3333     }
3334
3335   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3336     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3337                           vect_prologue);
3338
3339   /* Count statements in scalar loop.  Using this as scalar cost for a single
3340      iteration for now.
3341
3342      TODO: Add outer loop support.
3343
3344      TODO: Consider assigning different costs to different scalar
3345      statements.  */
3346
3347   scalar_single_iter_cost
3348     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3349
3350   /* Add additional cost for the peeled instructions in prologue and epilogue
3351      loop.
3352
3353      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3354      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3355
3356      TODO: Build an expression that represents peel_iters for prologue and
3357      epilogue to be used in a run-time test.  */
3358
3359   if (npeel  < 0)
3360     {
3361       peel_iters_prologue = vf/2;
3362       dump_printf (MSG_NOTE, "cost model: "
3363                    "prologue peel iters set to vf/2.\n");
3364
3365       /* If peeling for alignment is unknown, loop bound of main loop becomes
3366          unknown.  */
3367       peel_iters_epilogue = vf/2;
3368       dump_printf (MSG_NOTE, "cost model: "
3369                    "epilogue peel iters set to vf/2 because "
3370                    "peeling for alignment is unknown.\n");
3371
3372       /* If peeled iterations are unknown, count a taken branch and a not taken
3373          branch per peeled loop. Even if scalar loop iterations are known,
3374          vector iterations are not known since peeled prologue iterations are
3375          not known. Hence guards remain the same.  */
3376       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3377                             NULL, 0, vect_prologue);
3378       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3379                             NULL, 0, vect_prologue);
3380       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3381                             NULL, 0, vect_epilogue);
3382       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3383                             NULL, 0, vect_epilogue);
3384       stmt_info_for_cost *si;
3385       int j;
3386       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3387         {
3388           struct _stmt_vec_info *stmt_info
3389             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3390           (void) add_stmt_cost (target_cost_data,
3391                                 si->count * peel_iters_prologue,
3392                                 si->kind, stmt_info, si->misalign,
3393                                 vect_prologue);
3394           (void) add_stmt_cost (target_cost_data,
3395                                 si->count * peel_iters_epilogue,
3396                                 si->kind, stmt_info, si->misalign,
3397                                 vect_epilogue);
3398         }
3399     }
3400   else
3401     {
3402       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3403       stmt_info_for_cost *si;
3404       int j;
3405       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3406
3407       prologue_cost_vec.create (2);
3408       epilogue_cost_vec.create (2);
3409       peel_iters_prologue = npeel;
3410
3411       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3412                                           &peel_iters_epilogue,
3413                                           &LOOP_VINFO_SCALAR_ITERATION_COST
3414                                             (loop_vinfo),
3415                                           &prologue_cost_vec,
3416                                           &epilogue_cost_vec);
3417
3418       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3419         {
3420           struct _stmt_vec_info *stmt_info
3421             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3422           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3423                                 si->misalign, vect_prologue);
3424         }
3425
3426       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3427         {
3428           struct _stmt_vec_info *stmt_info
3429             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3430           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3431                                 si->misalign, vect_epilogue);
3432         }
3433
3434       prologue_cost_vec.release ();
3435       epilogue_cost_vec.release ();
3436     }
3437
3438   /* FORNOW: The scalar outside cost is incremented in one of the
3439      following ways:
3440
3441      1. The vectorizer checks for alignment and aliasing and generates
3442      a condition that allows dynamic vectorization.  A cost model
3443      check is ANDED with the versioning condition.  Hence scalar code
3444      path now has the added cost of the versioning check.
3445
3446        if (cost > th & versioning_check)
3447          jmp to vector code
3448
3449      Hence run-time scalar is incremented by not-taken branch cost.
3450
3451      2. The vectorizer then checks if a prologue is required.  If the
3452      cost model check was not done before during versioning, it has to
3453      be done before the prologue check.
3454
3455        if (cost <= th)
3456          prologue = scalar_iters
3457        if (prologue == 0)
3458          jmp to vector code
3459        else
3460          execute prologue
3461        if (prologue == num_iters)
3462          go to exit
3463
3464      Hence the run-time scalar cost is incremented by a taken branch,
3465      plus a not-taken branch, plus a taken branch cost.
3466
3467      3. The vectorizer then checks if an epilogue is required.  If the
3468      cost model check was not done before during prologue check, it
3469      has to be done with the epilogue check.
3470
3471        if (prologue == 0)
3472          jmp to vector code
3473        else
3474          execute prologue
3475        if (prologue == num_iters)
3476          go to exit
3477        vector code:
3478          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3479            jmp to epilogue
3480
3481      Hence the run-time scalar cost should be incremented by 2 taken
3482      branches.
3483
3484      TODO: The back end may reorder the BBS's differently and reverse
3485      conditions/branch directions.  Change the estimates below to
3486      something more reasonable.  */
3487
3488   /* If the number of iterations is known and we do not do versioning, we can
3489      decide whether to vectorize at compile time.  Hence the scalar version
3490      do not carry cost model guard costs.  */
3491   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3492       || LOOP_REQUIRES_VERSIONING (loop_vinfo))
3493     {
3494       /* Cost model check occurs at versioning.  */
3495       if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3496         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3497       else
3498         {
3499           /* Cost model check occurs at prologue generation.  */
3500           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3501             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3502               + vect_get_stmt_cost (cond_branch_not_taken);
3503           /* Cost model check occurs at epilogue generation.  */
3504           else
3505             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3506         }
3507     }
3508
3509   /* Complete the target-specific cost calculations.  */
3510   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3511                &vec_inside_cost, &vec_epilogue_cost);
3512
3513   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3514
3515   if (dump_enabled_p ())
3516     {
3517       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3518       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3519                    vec_inside_cost);
3520       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3521                    vec_prologue_cost);
3522       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3523                    vec_epilogue_cost);
3524       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3525                    scalar_single_iter_cost);
3526       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3527                    scalar_outside_cost);
3528       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3529                    vec_outside_cost);
3530       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3531                    peel_iters_prologue);
3532       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3533                    peel_iters_epilogue);
3534     }
3535
3536   /* Calculate number of iterations required to make the vector version
3537      profitable, relative to the loop bodies only.  The following condition
3538      must hold true:
3539      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3540      where
3541      SIC = scalar iteration cost, VIC = vector iteration cost,
3542      VOC = vector outside cost, VF = vectorization factor,
3543      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3544      SOC = scalar outside cost for run time cost model check.  */
3545
3546   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3547     {
3548       if (vec_outside_cost <= 0)
3549         min_profitable_iters = 0;
3550       else
3551         {
3552           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3553                                   - vec_inside_cost * peel_iters_prologue
3554                                   - vec_inside_cost * peel_iters_epilogue)
3555                                  / ((scalar_single_iter_cost * vf)
3556                                     - vec_inside_cost);
3557
3558           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3559               <= (((int) vec_inside_cost * min_profitable_iters)
3560                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3561             min_profitable_iters++;
3562         }
3563     }
3564   /* vector version will never be profitable.  */
3565   else
3566     {
3567       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3568         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3569                     "did not happen for a simd loop");
3570
3571       if (dump_enabled_p ())
3572         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3573                          "cost model: the vector iteration cost = %d "
3574                          "divided by the scalar iteration cost = %d "
3575                          "is greater or equal to the vectorization factor = %d"
3576                          ".\n",
3577                          vec_inside_cost, scalar_single_iter_cost, vf);
3578       *ret_min_profitable_niters = -1;
3579       *ret_min_profitable_estimate = -1;
3580       return;
3581     }
3582
3583   dump_printf (MSG_NOTE,
3584                "  Calculated minimum iters for profitability: %d\n",
3585                min_profitable_iters);
3586
3587   min_profitable_iters =
3588         min_profitable_iters < vf ? vf : min_profitable_iters;
3589
3590   if (dump_enabled_p ())
3591     dump_printf_loc (MSG_NOTE, vect_location,
3592                      "  Runtime profitability threshold = %d\n",
3593                      min_profitable_iters);
3594
3595   *ret_min_profitable_niters = min_profitable_iters;
3596
3597   /* Calculate number of iterations required to make the vector version
3598      profitable, relative to the loop bodies only.
3599
3600      Non-vectorized variant is SIC * niters and it must win over vector
3601      variant on the expected loop trip count.  The following condition must hold true:
3602      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3603
3604   if (vec_outside_cost <= 0)
3605     min_profitable_estimate = 0;
3606   else
3607     {
3608       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3609                                  - vec_inside_cost * peel_iters_prologue
3610                                  - vec_inside_cost * peel_iters_epilogue)
3611                                  / ((scalar_single_iter_cost * vf)
3612                                    - vec_inside_cost);
3613     }
3614   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3615   if (dump_enabled_p ())
3616     dump_printf_loc (MSG_NOTE, vect_location,
3617                      "  Static estimate profitability threshold = %d\n",
3618                      min_profitable_estimate);
3619
3620   *ret_min_profitable_estimate = min_profitable_estimate;
3621 }
3622
3623 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3624    vector elements (not bits) for a vector of mode MODE.  */
3625 static void
3626 calc_vec_perm_mask_for_shift (machine_mode mode, unsigned int offset,
3627                               unsigned char *sel)
3628 {
3629   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3630
3631   for (i = 0; i < nelt; i++)
3632     sel[i] = (i + offset) & (2*nelt - 1);
3633 }
3634
3635 /* Checks whether the target supports whole-vector shifts for vectors of mode
3636    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3637    it supports vec_perm_const with masks for all necessary shift amounts.  */
3638 static bool
3639 have_whole_vector_shift (machine_mode mode)
3640 {
3641   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3642     return true;
3643
3644   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3645     return false;
3646
3647   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3648   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3649
3650   for (i = nelt/2; i >= 1; i/=2)
3651     {
3652       calc_vec_perm_mask_for_shift (mode, i, sel);
3653       if (!can_vec_perm_p (mode, false, sel))
3654         return false;
3655     }
3656   return true;
3657 }
3658
3659 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3660
3661 static tree
3662 get_reduction_op (gimple *stmt, int reduc_index)
3663 {
3664   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3665     {
3666     case GIMPLE_SINGLE_RHS:
3667       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3668                   == ternary_op);
3669       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3670     case GIMPLE_UNARY_RHS:
3671       return gimple_assign_rhs1 (stmt);
3672     case GIMPLE_BINARY_RHS:
3673       return (reduc_index
3674               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3675     case GIMPLE_TERNARY_RHS:
3676       return gimple_op (stmt, reduc_index + 1);
3677     default:
3678       gcc_unreachable ();
3679     }
3680 }
3681
3682 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3683    functions. Design better to avoid maintenance issues.  */
3684
3685 /* Function vect_model_reduction_cost.
3686
3687    Models cost for a reduction operation, including the vector ops
3688    generated within the strip-mine loop, the initial definition before
3689    the loop, and the epilogue code that must be generated.  */
3690
3691 static void
3692 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3693                            int ncopies)
3694 {
3695   int prologue_cost = 0, epilogue_cost = 0;
3696   enum tree_code code;
3697   optab optab;
3698   tree vectype;
3699   gimple *orig_stmt;
3700   machine_mode mode;
3701   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3702   struct loop *loop = NULL;
3703   void *target_cost_data;
3704
3705   if (loop_vinfo)
3706     {
3707       loop = LOOP_VINFO_LOOP (loop_vinfo);
3708       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3709     }
3710   else
3711     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3712
3713   /* Condition reductions generate two reductions in the loop.  */
3714   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3715     ncopies *= 2;
3716
3717   /* Cost of reduction op inside loop.  */
3718   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3719                                         stmt_info, 0, vect_body);
3720
3721   vectype = STMT_VINFO_VECTYPE (stmt_info);
3722   mode = TYPE_MODE (vectype);
3723   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3724
3725   if (!orig_stmt)
3726     orig_stmt = STMT_VINFO_STMT (stmt_info);
3727
3728   code = gimple_assign_rhs_code (orig_stmt);
3729
3730   /* Add in cost for initial definition.
3731      For cond reduction we have four vectors: initial index, step, initial
3732      result of the data reduction, initial value of the index reduction.  */
3733   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3734                        == COND_REDUCTION ? 4 : 1;
3735   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3736                                   scalar_to_vec, stmt_info, 0,
3737                                   vect_prologue);
3738
3739   /* Determine cost of epilogue code.
3740
3741      We have a reduction operator that will reduce the vector in one statement.
3742      Also requires scalar extract.  */
3743
3744   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3745     {
3746       if (reduc_code != ERROR_MARK)
3747         {
3748           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3749             {
3750               /* An EQ stmt and an COND_EXPR stmt.  */
3751               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3752                                               vector_stmt, stmt_info, 0,
3753                                               vect_epilogue);
3754               /* Reduction of the max index and a reduction of the found
3755                  values.  */
3756               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3757                                               vec_to_scalar, stmt_info, 0,
3758                                               vect_epilogue);
3759               /* A broadcast of the max value.  */
3760               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3761                                               scalar_to_vec, stmt_info, 0,
3762                                               vect_epilogue);
3763             }
3764           else
3765             {
3766               epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3767                                               stmt_info, 0, vect_epilogue);
3768               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3769                                               vec_to_scalar, stmt_info, 0,
3770                                               vect_epilogue);
3771             }
3772         }
3773       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3774         {
3775           unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
3776           /* Extraction of scalar elements.  */
3777           epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits,
3778                                           vec_to_scalar, stmt_info, 0,
3779                                           vect_epilogue);
3780           /* Scalar max reductions via COND_EXPR / MAX_EXPR.  */
3781           epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3,
3782                                           scalar_stmt, stmt_info, 0,
3783                                           vect_epilogue);
3784         }
3785       else
3786         {
3787           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3788           tree bitsize =
3789             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3790           int element_bitsize = tree_to_uhwi (bitsize);
3791           int nelements = vec_size_in_bits / element_bitsize;
3792
3793           if (code == COND_EXPR)
3794             code = MAX_EXPR;
3795
3796           optab = optab_for_tree_code (code, vectype, optab_default);
3797
3798           /* We have a whole vector shift available.  */
3799           if (optab != unknown_optab
3800               && VECTOR_MODE_P (mode)
3801               && optab_handler (optab, mode) != CODE_FOR_nothing
3802               && have_whole_vector_shift (mode))
3803             {
3804               /* Final reduction via vector shifts and the reduction operator.
3805                  Also requires scalar extract.  */
3806               epilogue_cost += add_stmt_cost (target_cost_data,
3807                                               exact_log2 (nelements) * 2,
3808                                               vector_stmt, stmt_info, 0,
3809                                               vect_epilogue);
3810               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3811                                               vec_to_scalar, stmt_info, 0,
3812                                               vect_epilogue);
3813             }
3814           else
3815             /* Use extracts and reduction op for final reduction.  For N
3816                elements, we have N extracts and N-1 reduction ops.  */
3817             epilogue_cost += add_stmt_cost (target_cost_data,
3818                                             nelements + nelements - 1,
3819                                             vector_stmt, stmt_info, 0,
3820                                             vect_epilogue);
3821         }
3822     }
3823
3824   if (dump_enabled_p ())
3825     dump_printf (MSG_NOTE,
3826                  "vect_model_reduction_cost: inside_cost = %d, "
3827                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3828                  prologue_cost, epilogue_cost);
3829 }
3830
3831
3832 /* Function vect_model_induction_cost.
3833
3834    Models cost for induction operations.  */
3835
3836 static void
3837 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3838 {
3839   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3840   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3841   unsigned inside_cost, prologue_cost;
3842
3843   if (PURE_SLP_STMT (stmt_info))
3844     return;
3845
3846   /* loop cost for vec_loop.  */
3847   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3848                                stmt_info, 0, vect_body);
3849
3850   /* prologue cost for vec_init and vec_step.  */
3851   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3852                                  stmt_info, 0, vect_prologue);
3853
3854   if (dump_enabled_p ())
3855     dump_printf_loc (MSG_NOTE, vect_location,
3856                      "vect_model_induction_cost: inside_cost = %d, "
3857                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3858 }
3859
3860
3861
3862 /* Function get_initial_def_for_reduction
3863
3864    Input:
3865    STMT - a stmt that performs a reduction operation in the loop.
3866    INIT_VAL - the initial value of the reduction variable
3867
3868    Output:
3869    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3870         of the reduction (used for adjusting the epilog - see below).
3871    Return a vector variable, initialized according to the operation that STMT
3872         performs. This vector will be used as the initial value of the
3873         vector of partial results.
3874
3875    Option1 (adjust in epilog): Initialize the vector as follows:
3876      add/bit or/xor:    [0,0,...,0,0]
3877      mult/bit and:      [1,1,...,1,1]
3878      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3879    and when necessary (e.g. add/mult case) let the caller know
3880    that it needs to adjust the result by init_val.
3881
3882    Option2: Initialize the vector as follows:
3883      add/bit or/xor:    [init_val,0,0,...,0]
3884      mult/bit and:      [init_val,1,1,...,1]
3885      min/max/cond_expr: [init_val,init_val,...,init_val]
3886    and no adjustments are needed.
3887
3888    For example, for the following code:
3889
3890    s = init_val;
3891    for (i=0;i<n;i++)
3892      s = s + a[i];
3893
3894    STMT is 's = s + a[i]', and the reduction variable is 's'.
3895    For a vector of 4 units, we want to return either [0,0,0,init_val],
3896    or [0,0,0,0] and let the caller know that it needs to adjust
3897    the result at the end by 'init_val'.
3898
3899    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3900    initialization vector is simpler (same element in all entries), if
3901    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3902
3903    A cost model should help decide between these two schemes.  */
3904
3905 tree
3906 get_initial_def_for_reduction (gimple *stmt, tree init_val,
3907                                tree *adjustment_def)
3908 {
3909   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3910   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3911   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3912   tree scalar_type = TREE_TYPE (init_val);
3913   tree vectype = get_vectype_for_scalar_type (scalar_type);
3914   int nunits;
3915   enum tree_code code = gimple_assign_rhs_code (stmt);
3916   tree def_for_init;
3917   tree init_def;
3918   tree *elts;
3919   int i;
3920   bool nested_in_vect_loop = false;
3921   REAL_VALUE_TYPE real_init_val = dconst0;
3922   int int_init_val = 0;
3923   gimple *def_stmt = NULL;
3924   gimple_seq stmts = NULL;
3925
3926   gcc_assert (vectype);
3927   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3928
3929   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3930               || SCALAR_FLOAT_TYPE_P (scalar_type));
3931
3932   if (nested_in_vect_loop_p (loop, stmt))
3933     nested_in_vect_loop = true;
3934   else
3935     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3936
3937   /* In case of double reduction we only create a vector variable to be put
3938      in the reduction phi node.  The actual statement creation is done in
3939      vect_create_epilog_for_reduction.  */
3940   if (adjustment_def && nested_in_vect_loop
3941       && TREE_CODE (init_val) == SSA_NAME
3942       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3943       && gimple_code (def_stmt) == GIMPLE_PHI
3944       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3945       && vinfo_for_stmt (def_stmt)
3946       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3947           == vect_double_reduction_def)
3948     {
3949       *adjustment_def = NULL;
3950       return vect_create_destination_var (init_val, vectype);
3951     }
3952
3953   /* In case of a nested reduction do not use an adjustment def as
3954      that case is not supported by the epilogue generation correctly
3955      if ncopies is not one.  */
3956   if (adjustment_def && nested_in_vect_loop)
3957     {
3958       *adjustment_def = NULL;
3959       return vect_get_vec_def_for_operand (init_val, stmt);
3960     }
3961
3962   switch (code)
3963     {
3964       case WIDEN_SUM_EXPR:
3965       case DOT_PROD_EXPR:
3966       case SAD_EXPR:
3967       case PLUS_EXPR:
3968       case MINUS_EXPR:
3969       case BIT_IOR_EXPR:
3970       case BIT_XOR_EXPR:
3971       case MULT_EXPR:
3972       case BIT_AND_EXPR:
3973         /* ADJUSMENT_DEF is NULL when called from
3974            vect_create_epilog_for_reduction to vectorize double reduction.  */
3975         if (adjustment_def)
3976           *adjustment_def = init_val;
3977
3978         if (code == MULT_EXPR)
3979           {
3980             real_init_val = dconst1;
3981             int_init_val = 1;
3982           }
3983
3984         if (code == BIT_AND_EXPR)
3985           int_init_val = -1;
3986
3987         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3988           def_for_init = build_real (scalar_type, real_init_val);
3989         else
3990           def_for_init = build_int_cst (scalar_type, int_init_val);
3991
3992         /* Create a vector of '0' or '1' except the first element.  */
3993         elts = XALLOCAVEC (tree, nunits);
3994         for (i = nunits - 2; i >= 0; --i)
3995           elts[i + 1] = def_for_init;
3996
3997         /* Option1: the first element is '0' or '1' as well.  */
3998         if (adjustment_def)
3999           {
4000             elts[0] = def_for_init;
4001             init_def = build_vector (vectype, elts);
4002             break;
4003           }
4004
4005         /* Option2: the first element is INIT_VAL.  */
4006         elts[0] = init_val;
4007         if (TREE_CONSTANT (init_val))
4008           init_def = build_vector (vectype, elts);
4009         else
4010           {
4011             vec<constructor_elt, va_gc> *v;
4012             vec_alloc (v, nunits);
4013             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4014             for (i = 1; i < nunits; ++i)
4015               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4016             init_def = build_constructor (vectype, v);
4017           }
4018
4019         break;
4020
4021       case MIN_EXPR:
4022       case MAX_EXPR:
4023       case COND_EXPR:
4024         if (adjustment_def)
4025           {
4026             *adjustment_def = NULL_TREE;
4027             if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4028               {
4029                 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4030                 break;
4031               }
4032           }
4033         init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
4034         if (! gimple_seq_empty_p (stmts))
4035           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4036         init_def = build_vector_from_val (vectype, init_val);
4037         break;
4038
4039       default:
4040         gcc_unreachable ();
4041     }
4042
4043   return init_def;
4044 }
4045
4046 /* Get at the initial defs for OP in the reduction SLP_NODE.
4047    NUMBER_OF_VECTORS is the number of vector defs to create.
4048    REDUC_INDEX is the index of the reduction operand in the statements.  */
4049
4050 static void
4051 get_initial_defs_for_reduction (slp_tree slp_node,
4052                                 vec<tree> *vec_oprnds,
4053                                 unsigned int number_of_vectors,
4054                                 int reduc_index, enum tree_code code)
4055 {
4056   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4057   gimple *stmt = stmts[0];
4058   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
4059   unsigned nunits;
4060   tree vec_cst;
4061   tree *elts;
4062   unsigned j, number_of_places_left_in_vector;
4063   tree vector_type, scalar_type;
4064   tree vop;
4065   int group_size = stmts.length ();
4066   unsigned int vec_num, i;
4067   unsigned number_of_copies = 1;
4068   vec<tree> voprnds;
4069   voprnds.create (number_of_vectors);
4070   bool constant_p;
4071   tree neutral_op = NULL;
4072   gimple *def_stmt;
4073   struct loop *loop;
4074   gimple_seq ctor_seq = NULL;
4075
4076   vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
4077   scalar_type = TREE_TYPE (vector_type);
4078   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
4079
4080   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
4081               && reduc_index != -1);
4082
4083   /* op is the reduction operand of the first stmt already.  */
4084   /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
4085      we need either neutral operands or the original operands.  See
4086      get_initial_def_for_reduction() for details.  */
4087   switch (code)
4088     {
4089     case WIDEN_SUM_EXPR:
4090     case DOT_PROD_EXPR:
4091     case SAD_EXPR:
4092     case PLUS_EXPR:
4093     case MINUS_EXPR:
4094     case BIT_IOR_EXPR:
4095     case BIT_XOR_EXPR:
4096       neutral_op = build_zero_cst (scalar_type);
4097       break;
4098
4099     case MULT_EXPR:
4100       neutral_op = build_one_cst (scalar_type);
4101       break;
4102
4103     case BIT_AND_EXPR:
4104       neutral_op = build_all_ones_cst (scalar_type);
4105       break;
4106
4107     /* For MIN/MAX we don't have an easy neutral operand but
4108        the initial values can be used fine here.  Only for
4109        a reduction chain we have to force a neutral element.  */
4110     case MAX_EXPR:
4111     case MIN_EXPR:
4112       if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
4113         neutral_op = NULL;
4114       else
4115         {
4116           tree op = get_reduction_op (stmts[0], reduc_index);
4117           def_stmt = SSA_NAME_DEF_STMT (op);
4118           loop = (gimple_bb (stmt))->loop_father;
4119           neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4120                                               loop_preheader_edge (loop));
4121         }
4122       break;
4123
4124     default:
4125       gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
4126       neutral_op = NULL;
4127     }
4128
4129   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4130      created vectors. It is greater than 1 if unrolling is performed.
4131
4132      For example, we have two scalar operands, s1 and s2 (e.g., group of
4133      strided accesses of size two), while NUNITS is four (i.e., four scalars
4134      of this type can be packed in a vector).  The output vector will contain
4135      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
4136      will be 2).
4137
4138      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
4139      containing the operands.
4140
4141      For example, NUNITS is four as before, and the group size is 8
4142      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
4143      {s5, s6, s7, s8}.  */
4144
4145   number_of_copies = nunits * number_of_vectors / group_size;
4146
4147   number_of_places_left_in_vector = nunits;
4148   constant_p = true;
4149   elts = XALLOCAVEC (tree, nunits);
4150   for (j = 0; j < number_of_copies; j++)
4151     {
4152       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
4153         {
4154           tree op = get_reduction_op (stmt, reduc_index);
4155           loop = (gimple_bb (stmt))->loop_father;
4156           def_stmt = SSA_NAME_DEF_STMT (op);
4157
4158           gcc_assert (loop);
4159
4160           /* Get the def before the loop.  In reduction chain we have only
4161              one initial value.  */
4162           if ((j != (number_of_copies - 1)
4163                || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
4164                    && i != 0))
4165               && neutral_op)
4166             op = neutral_op;
4167           else
4168             op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4169                                         loop_preheader_edge (loop));
4170
4171           /* Create 'vect_ = {op0,op1,...,opn}'.  */
4172           number_of_places_left_in_vector--;
4173           elts[number_of_places_left_in_vector] = op;
4174           if (!CONSTANT_CLASS_P (op))
4175             constant_p = false;
4176
4177           if (number_of_places_left_in_vector == 0)
4178             {
4179               if (constant_p)
4180                 vec_cst = build_vector (vector_type, elts);
4181               else
4182                 {
4183                   vec<constructor_elt, va_gc> *v;
4184                   unsigned k;
4185                   vec_alloc (v, nunits);
4186                   for (k = 0; k < nunits; ++k)
4187                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
4188                   vec_cst = build_constructor (vector_type, v);
4189                 }
4190               tree init;
4191               gimple_stmt_iterator gsi;
4192               init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
4193               if (ctor_seq != NULL)
4194                 {
4195                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
4196                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
4197                                                         GSI_SAME_STMT);
4198                   ctor_seq = NULL;
4199                 }
4200               voprnds.quick_push (init);
4201
4202               number_of_places_left_in_vector = nunits;
4203               constant_p = true;
4204             }
4205         }
4206     }
4207
4208   /* Since the vectors are created in the reverse order, we should invert
4209      them.  */
4210   vec_num = voprnds.length ();
4211   for (j = vec_num; j != 0; j--)
4212     {
4213       vop = voprnds[j - 1];
4214       vec_oprnds->quick_push (vop);
4215     }
4216
4217   voprnds.release ();
4218
4219   /* In case that VF is greater than the unrolling factor needed for the SLP
4220      group of stmts, NUMBER_OF_VECTORS to be created is greater than
4221      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
4222      to replicate the vectors.  */
4223   while (number_of_vectors > vec_oprnds->length ())
4224     {
4225       tree neutral_vec = NULL;
4226
4227       if (neutral_op)
4228         {
4229           if (!neutral_vec)
4230             neutral_vec = build_vector_from_val (vector_type, neutral_op);
4231
4232           vec_oprnds->quick_push (neutral_vec);
4233         }
4234       else
4235         {
4236           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
4237             vec_oprnds->quick_push (vop);
4238         }
4239     }
4240 }
4241
4242
4243 /* Function vect_create_epilog_for_reduction
4244
4245    Create code at the loop-epilog to finalize the result of a reduction
4246    computation.
4247
4248    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4249      reduction statements.
4250    STMT is the scalar reduction stmt that is being vectorized.
4251    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4252      number of elements that we can fit in a vectype (nunits).  In this case
4253      we have to generate more than one vector stmt - i.e - we need to "unroll"
4254      the vector stmt by a factor VF/nunits.  For more details see documentation
4255      in vectorizable_operation.
4256    REDUC_CODE is the tree-code for the epilog reduction.
4257    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4258      computation.
4259    REDUC_INDEX is the index of the operand in the right hand side of the
4260      statement that is defined by REDUCTION_PHI.
4261    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4262    SLP_NODE is an SLP node containing a group of reduction statements. The
4263      first one in this group is STMT.
4264
4265    This function:
4266    1. Creates the reduction def-use cycles: sets the arguments for
4267       REDUCTION_PHIS:
4268       The loop-entry argument is the vectorized initial-value of the reduction.
4269       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4270       sums.
4271    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4272       by applying the operation specified by REDUC_CODE if available, or by
4273       other means (whole-vector shifts or a scalar loop).
4274       The function also creates a new phi node at the loop exit to preserve
4275       loop-closed form, as illustrated below.
4276
4277      The flow at the entry to this function:
4278
4279         loop:
4280           vec_def = phi <null, null>            # REDUCTION_PHI
4281           VECT_DEF = vector_stmt                # vectorized form of STMT
4282           s_loop = scalar_stmt                  # (scalar) STMT
4283         loop_exit:
4284           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4285           use <s_out0>
4286           use <s_out0>
4287
4288      The above is transformed by this function into:
4289
4290         loop:
4291           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4292           VECT_DEF = vector_stmt                # vectorized form of STMT
4293           s_loop = scalar_stmt                  # (scalar) STMT
4294         loop_exit:
4295           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4296           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4297           v_out2 = reduce <v_out1>
4298           s_out3 = extract_field <v_out2, 0>
4299           s_out4 = adjust_result <s_out3>
4300           use <s_out4>
4301           use <s_out4>
4302 */
4303
4304 static void
4305 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4306                                   gimple *reduc_def_stmt,
4307                                   int ncopies, enum tree_code reduc_code,
4308                                   vec<gimple *> reduction_phis,
4309                                   int reduc_index, bool double_reduc,
4310                                   slp_tree slp_node)
4311 {
4312   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4313   stmt_vec_info prev_phi_info;
4314   tree vectype;
4315   machine_mode mode;
4316   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4317   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4318   basic_block exit_bb;
4319   tree scalar_dest;
4320   tree scalar_type;
4321   gimple *new_phi = NULL, *phi;
4322   gimple_stmt_iterator exit_gsi;
4323   tree vec_dest;
4324   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4325   gimple *epilog_stmt = NULL;
4326   enum tree_code code = gimple_assign_rhs_code (stmt);
4327   gimple *exit_phi;
4328   tree bitsize;
4329   tree adjustment_def = NULL;
4330   tree vec_initial_def = NULL;
4331   tree expr, def, initial_def = NULL;
4332   tree orig_name, scalar_result;
4333   imm_use_iterator imm_iter, phi_imm_iter;
4334   use_operand_p use_p, phi_use_p;
4335   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4336   bool nested_in_vect_loop = false;
4337   auto_vec<gimple *> new_phis;
4338   auto_vec<gimple *> inner_phis;
4339   enum vect_def_type dt = vect_unknown_def_type;
4340   int j, i;
4341   auto_vec<tree> scalar_results;
4342   unsigned int group_size = 1, k, ratio;
4343   auto_vec<tree> vec_initial_defs;
4344   auto_vec<gimple *> phis;
4345   bool slp_reduc = false;
4346   tree new_phi_result;
4347   gimple *inner_phi = NULL;
4348   tree induction_index = NULL_TREE;
4349
4350   if (slp_node)
4351     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4352
4353   if (nested_in_vect_loop_p (loop, stmt))
4354     {
4355       outer_loop = loop;
4356       loop = loop->inner;
4357       nested_in_vect_loop = true;
4358       gcc_assert (!slp_node);
4359     }
4360
4361   vectype = STMT_VINFO_VECTYPE (stmt_info);
4362   gcc_assert (vectype);
4363   mode = TYPE_MODE (vectype);
4364
4365   /* 1. Create the reduction def-use cycle:
4366      Set the arguments of REDUCTION_PHIS, i.e., transform
4367
4368         loop:
4369           vec_def = phi <null, null>            # REDUCTION_PHI
4370           VECT_DEF = vector_stmt                # vectorized form of STMT
4371           ...
4372
4373      into:
4374
4375         loop:
4376           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4377           VECT_DEF = vector_stmt                # vectorized form of STMT
4378           ...
4379
4380      (in case of SLP, do it for all the phis). */
4381
4382   /* Get the loop-entry arguments.  */
4383   enum vect_def_type initial_def_dt = vect_unknown_def_type;
4384   if (slp_node)
4385     {
4386       unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4387       vec_initial_defs.reserve (vec_num);
4388       get_initial_defs_for_reduction (slp_node, &vec_initial_defs,
4389                                       vec_num, reduc_index, code);
4390     }
4391   else
4392     {
4393       /* Get at the scalar def before the loop, that defines the initial value
4394          of the reduction variable.  */
4395       gimple *def_stmt;
4396       initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4397                                            loop_preheader_edge (loop));
4398       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
4399       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4400                                                        &adjustment_def);
4401       vec_initial_defs.create (1);
4402       vec_initial_defs.quick_push (vec_initial_def);
4403     }
4404
4405   /* Set phi nodes arguments.  */
4406   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4407     {
4408       tree vec_init_def, def;
4409       gimple_seq stmts;
4410       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4411                                            true, NULL_TREE);
4412       if (stmts)
4413         gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4414
4415       def = vect_defs[i];
4416       for (j = 0; j < ncopies; j++)
4417         {
4418           if (j != 0)
4419             {
4420               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4421               if (nested_in_vect_loop)
4422                 vec_init_def
4423                   = vect_get_vec_def_for_stmt_copy (initial_def_dt,
4424                                                     vec_init_def);
4425             }
4426
4427           /* Set the loop-entry arg of the reduction-phi.  */
4428
4429           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4430               == INTEGER_INDUC_COND_REDUCTION)
4431             {
4432               /* Initialise the reduction phi to zero.  This prevents initial
4433                  values of non-zero interferring with the reduction op.  */
4434               gcc_assert (ncopies == 1);
4435               gcc_assert (i == 0);
4436
4437               tree vec_init_def_type = TREE_TYPE (vec_init_def);
4438               tree zero_vec = build_zero_cst (vec_init_def_type);
4439
4440               add_phi_arg (as_a <gphi *> (phi), zero_vec,
4441                            loop_preheader_edge (loop), UNKNOWN_LOCATION);
4442             }
4443           else
4444             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4445                          loop_preheader_edge (loop), UNKNOWN_LOCATION);
4446
4447           /* Set the loop-latch arg for the reduction-phi.  */
4448           if (j > 0)
4449             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4450
4451           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4452                        UNKNOWN_LOCATION);
4453
4454           if (dump_enabled_p ())
4455             {
4456               dump_printf_loc (MSG_NOTE, vect_location,
4457                                "transform reduction: created def-use cycle: ");
4458               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4459               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4460             }
4461         }
4462     }
4463
4464   /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
4465      which is updated with the current index of the loop for every match of
4466      the original loop's cond_expr (VEC_STMT).  This results in a vector
4467      containing the last time the condition passed for that vector lane.
4468      The first match will be a 1 to allow 0 to be used for non-matching
4469      indexes.  If there are no matches at all then the vector will be all
4470      zeroes.  */
4471   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4472     {
4473       tree indx_before_incr, indx_after_incr;
4474       int nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
4475       int k;
4476
4477       gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4478       gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
4479
4480       int scalar_precision
4481         = GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (vectype)));
4482       tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
4483       tree cr_index_vector_type = build_vector_type
4484         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype));
4485
4486       /* First we create a simple vector induction variable which starts
4487          with the values {1,2,3,...} (SERIES_VECT) and increments by the
4488          vector size (STEP).  */
4489
4490       /* Create a {1,2,3,...} vector.  */
4491       tree *vtemp = XALLOCAVEC (tree, nunits_out);
4492       for (k = 0; k < nunits_out; ++k)
4493         vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
4494       tree series_vect = build_vector (cr_index_vector_type, vtemp);
4495
4496       /* Create a vector of the step value.  */
4497       tree step = build_int_cst (cr_index_scalar_type, nunits_out);
4498       tree vec_step = build_vector_from_val (cr_index_vector_type, step);
4499
4500       /* Create an induction variable.  */
4501       gimple_stmt_iterator incr_gsi;
4502       bool insert_after;
4503       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4504       create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
4505                  insert_after, &indx_before_incr, &indx_after_incr);
4506
4507       /* Next create a new phi node vector (NEW_PHI_TREE) which starts
4508          filled with zeros (VEC_ZERO).  */
4509
4510       /* Create a vector of 0s.  */
4511       tree zero = build_zero_cst (cr_index_scalar_type);
4512       tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
4513
4514       /* Create a vector phi node.  */
4515       tree new_phi_tree = make_ssa_name (cr_index_vector_type);
4516       new_phi = create_phi_node (new_phi_tree, loop->header);
4517       set_vinfo_for_stmt (new_phi,
4518                           new_stmt_vec_info (new_phi, loop_vinfo));
4519       add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
4520                    loop_preheader_edge (loop), UNKNOWN_LOCATION);
4521
4522       /* Now take the condition from the loops original cond_expr
4523          (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
4524          every match uses values from the induction variable
4525          (INDEX_BEFORE_INCR) otherwise uses values from the phi node
4526          (NEW_PHI_TREE).
4527          Finally, we update the phi (NEW_PHI_TREE) to take the value of
4528          the new cond_expr (INDEX_COND_EXPR).  */
4529
4530       /* Duplicate the condition from vec_stmt.  */
4531       tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
4532
4533       /* Create a conditional, where the condition is taken from vec_stmt
4534          (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
4535          else is the phi (NEW_PHI_TREE).  */
4536       tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
4537                                      ccompare, indx_before_incr,
4538                                      new_phi_tree);
4539       induction_index = make_ssa_name (cr_index_vector_type);
4540       gimple *index_condition = gimple_build_assign (induction_index,
4541                                                      index_cond_expr);
4542       gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
4543       stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
4544                                                         loop_vinfo);
4545       STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
4546       set_vinfo_for_stmt (index_condition, index_vec_info);
4547
4548       /* Update the phi with the vec cond.  */
4549       add_phi_arg (as_a <gphi *> (new_phi), induction_index,
4550                    loop_latch_edge (loop), UNKNOWN_LOCATION);
4551     }
4552
4553   /* 2. Create epilog code.
4554         The reduction epilog code operates across the elements of the vector
4555         of partial results computed by the vectorized loop.
4556         The reduction epilog code consists of:
4557
4558         step 1: compute the scalar result in a vector (v_out2)
4559         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4560         step 3: adjust the scalar result (s_out3) if needed.
4561
4562         Step 1 can be accomplished using one the following three schemes:
4563           (scheme 1) using reduc_code, if available.
4564           (scheme 2) using whole-vector shifts, if available.
4565           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4566                      combined.
4567
4568           The overall epilog code looks like this:
4569
4570           s_out0 = phi <s_loop>         # original EXIT_PHI
4571           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4572           v_out2 = reduce <v_out1>              # step 1
4573           s_out3 = extract_field <v_out2, 0>    # step 2
4574           s_out4 = adjust_result <s_out3>       # step 3
4575
4576           (step 3 is optional, and steps 1 and 2 may be combined).
4577           Lastly, the uses of s_out0 are replaced by s_out4.  */
4578
4579
4580   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4581          v_out1 = phi <VECT_DEF>
4582          Store them in NEW_PHIS.  */
4583
4584   exit_bb = single_exit (loop)->dest;
4585   prev_phi_info = NULL;
4586   new_phis.create (vect_defs.length ());
4587   FOR_EACH_VEC_ELT (vect_defs, i, def)
4588     {
4589       for (j = 0; j < ncopies; j++)
4590         {
4591           tree new_def = copy_ssa_name (def);
4592           phi = create_phi_node (new_def, exit_bb);
4593           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4594           if (j == 0)
4595             new_phis.quick_push (phi);
4596           else
4597             {
4598               def = vect_get_vec_def_for_stmt_copy (dt, def);
4599               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4600             }
4601
4602           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4603           prev_phi_info = vinfo_for_stmt (phi);
4604         }
4605     }
4606
4607   /* The epilogue is created for the outer-loop, i.e., for the loop being
4608      vectorized.  Create exit phis for the outer loop.  */
4609   if (double_reduc)
4610     {
4611       loop = outer_loop;
4612       exit_bb = single_exit (loop)->dest;
4613       inner_phis.create (vect_defs.length ());
4614       FOR_EACH_VEC_ELT (new_phis, i, phi)
4615         {
4616           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4617           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4618           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4619                            PHI_RESULT (phi));
4620           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4621                                                             loop_vinfo));
4622           inner_phis.quick_push (phi);
4623           new_phis[i] = outer_phi;
4624           prev_phi_info = vinfo_for_stmt (outer_phi);
4625           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4626             {
4627               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4628               new_result = copy_ssa_name (PHI_RESULT (phi));
4629               outer_phi = create_phi_node (new_result, exit_bb);
4630               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4631                                PHI_RESULT (phi));
4632               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4633                                                                 loop_vinfo));
4634               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4635               prev_phi_info = vinfo_for_stmt (outer_phi);
4636             }
4637         }
4638     }
4639
4640   exit_gsi = gsi_after_labels (exit_bb);
4641
4642   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4643          (i.e. when reduc_code is not available) and in the final adjustment
4644          code (if needed).  Also get the original scalar reduction variable as
4645          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4646          represents a reduction pattern), the tree-code and scalar-def are
4647          taken from the original stmt that the pattern-stmt (STMT) replaces.
4648          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4649          are taken from STMT.  */
4650
4651   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4652   if (!orig_stmt)
4653     {
4654       /* Regular reduction  */
4655       orig_stmt = stmt;
4656     }
4657   else
4658     {
4659       /* Reduction pattern  */
4660       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4661       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4662       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4663     }
4664
4665   code = gimple_assign_rhs_code (orig_stmt);
4666   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4667      partial results are added and not subtracted.  */
4668   if (code == MINUS_EXPR)
4669     code = PLUS_EXPR;
4670
4671   scalar_dest = gimple_assign_lhs (orig_stmt);
4672   scalar_type = TREE_TYPE (scalar_dest);
4673   scalar_results.create (group_size);
4674   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4675   bitsize = TYPE_SIZE (scalar_type);
4676
4677   /* In case this is a reduction in an inner-loop while vectorizing an outer
4678      loop - we don't need to extract a single scalar result at the end of the
4679      inner-loop (unless it is double reduction, i.e., the use of reduction is
4680      outside the outer-loop).  The final vector of partial results will be used
4681      in the vectorized outer-loop, or reduced to a scalar result at the end of
4682      the outer-loop.  */
4683   if (nested_in_vect_loop && !double_reduc)
4684     goto vect_finalize_reduction;
4685
4686   /* SLP reduction without reduction chain, e.g.,
4687      # a1 = phi <a2, a0>
4688      # b1 = phi <b2, b0>
4689      a2 = operation (a1)
4690      b2 = operation (b1)  */
4691   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4692
4693   /* In case of reduction chain, e.g.,
4694      # a1 = phi <a3, a0>
4695      a2 = operation (a1)
4696      a3 = operation (a2),
4697
4698      we may end up with more than one vector result.  Here we reduce them to
4699      one vector.  */
4700   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4701     {
4702       tree first_vect = PHI_RESULT (new_phis[0]);
4703       tree tmp;
4704       gassign *new_vec_stmt = NULL;
4705
4706       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4707       for (k = 1; k < new_phis.length (); k++)
4708         {
4709           gimple *next_phi = new_phis[k];
4710           tree second_vect = PHI_RESULT (next_phi);
4711
4712           tmp = build2 (code, vectype,  first_vect, second_vect);
4713           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4714           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4715           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4716           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4717         }
4718
4719       new_phi_result = first_vect;
4720       if (new_vec_stmt)
4721         {
4722           new_phis.truncate (0);
4723           new_phis.safe_push (new_vec_stmt);
4724         }
4725     }
4726   else
4727     new_phi_result = PHI_RESULT (new_phis[0]);
4728
4729   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
4730       && reduc_code != ERROR_MARK)
4731     {
4732       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4733          various data values where the condition matched and another vector
4734          (INDUCTION_INDEX) containing all the indexes of those matches.  We
4735          need to extract the last matching index (which will be the index with
4736          highest value) and use this to index into the data vector.
4737          For the case where there were no matches, the data vector will contain
4738          all default values and the index vector will be all zeros.  */
4739
4740       /* Get various versions of the type of the vector of indexes.  */
4741       tree index_vec_type = TREE_TYPE (induction_index);
4742       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4743       tree index_scalar_type = TREE_TYPE (index_vec_type);
4744       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4745         (index_vec_type);
4746
4747       /* Get an unsigned integer version of the type of the data vector.  */
4748       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4749       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4750       tree vectype_unsigned = build_vector_type
4751         (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4752
4753       /* First we need to create a vector (ZERO_VEC) of zeros and another
4754          vector (MAX_INDEX_VEC) filled with the last matching index, which we
4755          can create using a MAX reduction and then expanding.
4756          In the case where the loop never made any matches, the max index will
4757          be zero.  */
4758
4759       /* Vector of {0, 0, 0,...}.  */
4760       tree zero_vec = make_ssa_name (vectype);
4761       tree zero_vec_rhs = build_zero_cst (vectype);
4762       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4763       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4764
4765       /* Find maximum value from the vector of found indexes.  */
4766       tree max_index = make_ssa_name (index_scalar_type);
4767       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4768                                                     induction_index);
4769       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4770
4771       /* Vector of {max_index, max_index, max_index,...}.  */
4772       tree max_index_vec = make_ssa_name (index_vec_type);
4773       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4774                                                       max_index);
4775       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4776                                                         max_index_vec_rhs);
4777       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4778
4779       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4780          with the vector (INDUCTION_INDEX) of found indexes, choosing values
4781          from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4782          otherwise.  Only one value should match, resulting in a vector
4783          (VEC_COND) with one data value and the rest zeros.
4784          In the case where the loop never made any matches, every index will
4785          match, resulting in a vector with all data values (which will all be
4786          the default value).  */
4787
4788       /* Compare the max index vector to the vector of found indexes to find
4789          the position of the max value.  */
4790       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4791       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4792                                                       induction_index,
4793                                                       max_index_vec);
4794       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4795
4796       /* Use the compare to choose either values from the data vector or
4797          zero.  */
4798       tree vec_cond = make_ssa_name (vectype);
4799       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4800                                                    vec_compare, new_phi_result,
4801                                                    zero_vec);
4802       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4803
4804       /* Finally we need to extract the data value from the vector (VEC_COND)
4805          into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4806          reduction, but because this doesn't exist, we can use a MAX reduction
4807          instead.  The data value might be signed or a float so we need to cast
4808          it first.
4809          In the case where the loop never made any matches, the data values are
4810          all identical, and so will reduce down correctly.  */
4811
4812       /* Make the matched data values unsigned.  */
4813       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4814       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4815                                        vec_cond);
4816       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4817                                                         VIEW_CONVERT_EXPR,
4818                                                         vec_cond_cast_rhs);
4819       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4820
4821       /* Reduce down to a scalar value.  */
4822       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4823       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4824                                       optab_default);
4825       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4826                   != CODE_FOR_nothing);
4827       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4828                                                      REDUC_MAX_EXPR,
4829                                                      vec_cond_cast);
4830       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4831
4832       /* Convert the reduced value back to the result type and set as the
4833          result.  */
4834       gimple_seq stmts = NULL;
4835       new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type,
4836                                data_reduc);
4837       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
4838       scalar_results.safe_push (new_temp);
4839     }
4840   else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
4841            && reduc_code == ERROR_MARK)
4842     {
4843       /* Condition redution without supported REDUC_MAX_EXPR.  Generate
4844          idx = 0;
4845          idx_val = induction_index[0];
4846          val = data_reduc[0];
4847          for (idx = 0, val = init, i = 0; i < nelts; ++i)
4848            if (induction_index[i] > idx_val)
4849              val = data_reduc[i], idx_val = induction_index[i];
4850          return val;  */
4851
4852       tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
4853       tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
4854       unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
4855       unsigned HOST_WIDE_INT v_size
4856         = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
4857       tree idx_val = NULL_TREE, val = NULL_TREE;
4858       for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
4859         {
4860           tree old_idx_val = idx_val;
4861           tree old_val = val;
4862           idx_val = make_ssa_name (idx_eltype);
4863           epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
4864                                              build3 (BIT_FIELD_REF, idx_eltype,
4865                                                      induction_index,
4866                                                      bitsize_int (el_size),
4867                                                      bitsize_int (off)));
4868           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4869           val = make_ssa_name (data_eltype);
4870           epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
4871                                              build3 (BIT_FIELD_REF,
4872                                                      data_eltype,
4873                                                      new_phi_result,
4874                                                      bitsize_int (el_size),
4875                                                      bitsize_int (off)));
4876           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4877           if (off != 0)
4878             {
4879               tree new_idx_val = idx_val;
4880               tree new_val = val;
4881               if (off != v_size - el_size)
4882                 {
4883                   new_idx_val = make_ssa_name (idx_eltype);
4884                   epilog_stmt = gimple_build_assign (new_idx_val,
4885                                                      MAX_EXPR, idx_val,
4886                                                      old_idx_val);
4887                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4888                 }
4889               new_val = make_ssa_name (data_eltype);
4890               epilog_stmt = gimple_build_assign (new_val,
4891                                                  COND_EXPR,
4892                                                  build2 (GT_EXPR,
4893                                                          boolean_type_node,
4894                                                          idx_val,
4895                                                          old_idx_val),
4896                                                  val, old_val);
4897               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4898               idx_val = new_idx_val;
4899               val = new_val;
4900             }
4901         }
4902       /* Convert the reduced value back to the result type and set as the
4903          result.  */
4904       gimple_seq stmts = NULL;
4905       val = gimple_convert (&stmts, scalar_type, val);
4906       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
4907       scalar_results.safe_push (val);
4908     }
4909
4910   /* 2.3 Create the reduction code, using one of the three schemes described
4911          above. In SLP we simply need to extract all the elements from the
4912          vector (without reducing them), so we use scalar shifts.  */
4913   else if (reduc_code != ERROR_MARK && !slp_reduc)
4914     {
4915       tree tmp;
4916       tree vec_elem_type;
4917
4918       /* Case 1:  Create:
4919          v_out2 = reduc_expr <v_out1>  */
4920
4921       if (dump_enabled_p ())
4922         dump_printf_loc (MSG_NOTE, vect_location,
4923                          "Reduce using direct vector reduction.\n");
4924
4925       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4926       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4927         {
4928           tree tmp_dest =
4929               vect_create_destination_var (scalar_dest, vec_elem_type);
4930           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4931           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4932           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4933           gimple_assign_set_lhs (epilog_stmt, new_temp);
4934           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4935
4936           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4937         }
4938       else
4939         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4940
4941       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4942       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4943       gimple_assign_set_lhs (epilog_stmt, new_temp);
4944       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4945
4946       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4947           == INTEGER_INDUC_COND_REDUCTION)
4948         {
4949           /* Earlier we set the initial value to be zero.  Check the result
4950              and if it is zero then replace with the original initial
4951              value.  */
4952           tree zero = build_zero_cst (scalar_type);
4953           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4954
4955           tmp = make_ssa_name (new_scalar_dest);
4956           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4957                                              initial_def, new_temp);
4958           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4959           new_temp = tmp;
4960         }
4961
4962       scalar_results.safe_push (new_temp);
4963     }
4964   else
4965     {
4966       bool reduce_with_shift = have_whole_vector_shift (mode);
4967       int element_bitsize = tree_to_uhwi (bitsize);
4968       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4969       tree vec_temp;
4970
4971       /* COND reductions all do the final reduction with MAX_EXPR.  */
4972       if (code == COND_EXPR)
4973         code = MAX_EXPR;
4974
4975       /* Regardless of whether we have a whole vector shift, if we're
4976          emulating the operation via tree-vect-generic, we don't want
4977          to use it.  Only the first round of the reduction is likely
4978          to still be profitable via emulation.  */
4979       /* ??? It might be better to emit a reduction tree code here, so that
4980          tree-vect-generic can expand the first round via bit tricks.  */
4981       if (!VECTOR_MODE_P (mode))
4982         reduce_with_shift = false;
4983       else
4984         {
4985           optab optab = optab_for_tree_code (code, vectype, optab_default);
4986           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4987             reduce_with_shift = false;
4988         }
4989
4990       if (reduce_with_shift && !slp_reduc)
4991         {
4992           int nelements = vec_size_in_bits / element_bitsize;
4993           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4994
4995           int elt_offset;
4996
4997           tree zero_vec = build_zero_cst (vectype);
4998           /* Case 2: Create:
4999              for (offset = nelements/2; offset >= 1; offset/=2)
5000                 {
5001                   Create:  va' = vec_shift <va, offset>
5002                   Create:  va = vop <va, va'>
5003                 }  */
5004
5005           tree rhs;
5006
5007           if (dump_enabled_p ())
5008             dump_printf_loc (MSG_NOTE, vect_location,
5009                              "Reduce using vector shifts\n");
5010
5011           vec_dest = vect_create_destination_var (scalar_dest, vectype);
5012           new_temp = new_phi_result;
5013           for (elt_offset = nelements / 2;
5014                elt_offset >= 1;
5015                elt_offset /= 2)
5016             {
5017               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
5018               tree mask = vect_gen_perm_mask_any (vectype, sel);
5019               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
5020                                                  new_temp, zero_vec, mask);
5021               new_name = make_ssa_name (vec_dest, epilog_stmt);
5022               gimple_assign_set_lhs (epilog_stmt, new_name);
5023               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5024
5025               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
5026                                                  new_temp);
5027               new_temp = make_ssa_name (vec_dest, epilog_stmt);
5028               gimple_assign_set_lhs (epilog_stmt, new_temp);
5029               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5030             }
5031
5032           /* 2.4  Extract the final scalar result.  Create:
5033              s_out3 = extract_field <v_out2, bitpos>  */
5034
5035           if (dump_enabled_p ())
5036             dump_printf_loc (MSG_NOTE, vect_location,
5037                              "extract scalar result\n");
5038
5039           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5040                         bitsize, bitsize_zero_node);
5041           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5042           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5043           gimple_assign_set_lhs (epilog_stmt, new_temp);
5044           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5045           scalar_results.safe_push (new_temp);
5046         }
5047       else
5048         {
5049           /* Case 3: Create:
5050              s = extract_field <v_out2, 0>
5051              for (offset = element_size;
5052                   offset < vector_size;
5053                   offset += element_size;)
5054                {
5055                  Create:  s' = extract_field <v_out2, offset>
5056                  Create:  s = op <s, s'>  // For non SLP cases
5057                }  */
5058
5059           if (dump_enabled_p ())
5060             dump_printf_loc (MSG_NOTE, vect_location,
5061                              "Reduce using scalar code.\n");
5062
5063           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5064           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
5065             {
5066               int bit_offset;
5067               if (gimple_code (new_phi) == GIMPLE_PHI)
5068                 vec_temp = PHI_RESULT (new_phi);
5069               else
5070                 vec_temp = gimple_assign_lhs (new_phi);
5071               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
5072                             bitsize_zero_node);
5073               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5074               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5075               gimple_assign_set_lhs (epilog_stmt, new_temp);
5076               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5077
5078               /* In SLP we don't need to apply reduction operation, so we just
5079                  collect s' values in SCALAR_RESULTS.  */
5080               if (slp_reduc)
5081                 scalar_results.safe_push (new_temp);
5082
5083               for (bit_offset = element_bitsize;
5084                    bit_offset < vec_size_in_bits;
5085                    bit_offset += element_bitsize)
5086                 {
5087                   tree bitpos = bitsize_int (bit_offset);
5088                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
5089                                      bitsize, bitpos);
5090
5091                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5092                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
5093                   gimple_assign_set_lhs (epilog_stmt, new_name);
5094                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5095
5096                   if (slp_reduc)
5097                     {
5098                       /* In SLP we don't need to apply reduction operation, so
5099                          we just collect s' values in SCALAR_RESULTS.  */
5100                       new_temp = new_name;
5101                       scalar_results.safe_push (new_name);
5102                     }
5103                   else
5104                     {
5105                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
5106                                                          new_name, new_temp);
5107                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5108                       gimple_assign_set_lhs (epilog_stmt, new_temp);
5109                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5110                     }
5111                 }
5112             }
5113
5114           /* The only case where we need to reduce scalar results in SLP, is
5115              unrolling.  If the size of SCALAR_RESULTS is greater than
5116              GROUP_SIZE, we reduce them combining elements modulo
5117              GROUP_SIZE.  */
5118           if (slp_reduc)
5119             {
5120               tree res, first_res, new_res;
5121               gimple *new_stmt;
5122
5123               /* Reduce multiple scalar results in case of SLP unrolling.  */
5124               for (j = group_size; scalar_results.iterate (j, &res);
5125                    j++)
5126                 {
5127                   first_res = scalar_results[j % group_size];
5128                   new_stmt = gimple_build_assign (new_scalar_dest, code,
5129                                                   first_res, res);
5130                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
5131                   gimple_assign_set_lhs (new_stmt, new_res);
5132                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
5133                   scalar_results[j % group_size] = new_res;
5134                 }
5135             }
5136           else
5137             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
5138             scalar_results.safe_push (new_temp);
5139         }
5140
5141       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5142           == INTEGER_INDUC_COND_REDUCTION)
5143         {
5144           /* Earlier we set the initial value to be zero.  Check the result
5145              and if it is zero then replace with the original initial
5146              value.  */
5147           tree zero = build_zero_cst (scalar_type);
5148           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
5149
5150           tree tmp = make_ssa_name (new_scalar_dest);
5151           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
5152                                              initial_def, new_temp);
5153           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5154           scalar_results[0] = tmp;
5155         }
5156     }
5157
5158 vect_finalize_reduction:
5159
5160   if (double_reduc)
5161     loop = loop->inner;
5162
5163   /* 2.5 Adjust the final result by the initial value of the reduction
5164          variable. (When such adjustment is not needed, then
5165          'adjustment_def' is zero).  For example, if code is PLUS we create:
5166          new_temp = loop_exit_def + adjustment_def  */
5167
5168   if (adjustment_def)
5169     {
5170       gcc_assert (!slp_reduc);
5171       if (nested_in_vect_loop)
5172         {
5173           new_phi = new_phis[0];
5174           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
5175           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
5176           new_dest = vect_create_destination_var (scalar_dest, vectype);
5177         }
5178       else
5179         {
5180           new_temp = scalar_results[0];
5181           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
5182           expr = build2 (code, scalar_type, new_temp, adjustment_def);
5183           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
5184         }
5185
5186       epilog_stmt = gimple_build_assign (new_dest, expr);
5187       new_temp = make_ssa_name (new_dest, epilog_stmt);
5188       gimple_assign_set_lhs (epilog_stmt, new_temp);
5189       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5190       if (nested_in_vect_loop)
5191         {
5192           set_vinfo_for_stmt (epilog_stmt,
5193                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
5194           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
5195                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
5196
5197           if (!double_reduc)
5198             scalar_results.quick_push (new_temp);
5199           else
5200             scalar_results[0] = new_temp;
5201         }
5202       else
5203         scalar_results[0] = new_temp;
5204
5205       new_phis[0] = epilog_stmt;
5206     }
5207
5208   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
5209           phis with new adjusted scalar results, i.e., replace use <s_out0>
5210           with use <s_out4>.
5211
5212      Transform:
5213         loop_exit:
5214           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
5215           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
5216           v_out2 = reduce <v_out1>
5217           s_out3 = extract_field <v_out2, 0>
5218           s_out4 = adjust_result <s_out3>
5219           use <s_out0>
5220           use <s_out0>
5221
5222      into:
5223
5224         loop_exit:
5225           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
5226           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
5227           v_out2 = reduce <v_out1>
5228           s_out3 = extract_field <v_out2, 0>
5229           s_out4 = adjust_result <s_out3>
5230           use <s_out4>
5231           use <s_out4> */
5232
5233
5234   /* In SLP reduction chain we reduce vector results into one vector if
5235      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
5236      the last stmt in the reduction chain, since we are looking for the loop
5237      exit phi node.  */
5238   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
5239     {
5240       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
5241       /* Handle reduction patterns.  */
5242       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
5243         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
5244
5245       scalar_dest = gimple_assign_lhs (dest_stmt);
5246       group_size = 1;
5247     }
5248
5249   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
5250      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
5251      need to match SCALAR_RESULTS with corresponding statements.  The first
5252      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
5253      the first vector stmt, etc.
5254      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
5255   if (group_size > new_phis.length ())
5256     {
5257       ratio = group_size / new_phis.length ();
5258       gcc_assert (!(group_size % new_phis.length ()));
5259     }
5260   else
5261     ratio = 1;
5262
5263   for (k = 0; k < group_size; k++)
5264     {
5265       if (k % ratio == 0)
5266         {
5267           epilog_stmt = new_phis[k / ratio];
5268           reduction_phi = reduction_phis[k / ratio];
5269           if (double_reduc)
5270             inner_phi = inner_phis[k / ratio];
5271         }
5272
5273       if (slp_reduc)
5274         {
5275           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5276
5277           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
5278           /* SLP statements can't participate in patterns.  */
5279           gcc_assert (!orig_stmt);
5280           scalar_dest = gimple_assign_lhs (current_stmt);
5281         }
5282
5283       phis.create (3);
5284       /* Find the loop-closed-use at the loop exit of the original scalar
5285          result.  (The reduction result is expected to have two immediate uses -
5286          one at the latch block, and one at the loop exit).  */
5287       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5288         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
5289             && !is_gimple_debug (USE_STMT (use_p)))
5290           phis.safe_push (USE_STMT (use_p));
5291
5292       /* While we expect to have found an exit_phi because of loop-closed-ssa
5293          form we can end up without one if the scalar cycle is dead.  */
5294
5295       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5296         {
5297           if (outer_loop)
5298             {
5299               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
5300               gphi *vect_phi;
5301
5302               /* FORNOW. Currently not supporting the case that an inner-loop
5303                  reduction is not used in the outer-loop (but only outside the
5304                  outer-loop), unless it is double reduction.  */
5305               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5306                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
5307                           || double_reduc);
5308
5309               if (double_reduc)
5310                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5311               else
5312                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
5313               if (!double_reduc
5314                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5315                       != vect_double_reduction_def)
5316                 continue;
5317
5318               /* Handle double reduction:
5319
5320                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
5321                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
5322                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
5323                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
5324
5325                  At that point the regular reduction (stmt2 and stmt3) is
5326                  already vectorized, as well as the exit phi node, stmt4.
5327                  Here we vectorize the phi node of double reduction, stmt1, and
5328                  update all relevant statements.  */
5329
5330               /* Go through all the uses of s2 to find double reduction phi
5331                  node, i.e., stmt1 above.  */
5332               orig_name = PHI_RESULT (exit_phi);
5333               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5334                 {
5335                   stmt_vec_info use_stmt_vinfo;
5336                   stmt_vec_info new_phi_vinfo;
5337                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5338                   basic_block bb = gimple_bb (use_stmt);
5339                   gimple *use;
5340
5341                   /* Check that USE_STMT is really double reduction phi
5342                      node.  */
5343                   if (gimple_code (use_stmt) != GIMPLE_PHI
5344                       || gimple_phi_num_args (use_stmt) != 2
5345                       || bb->loop_father != outer_loop)
5346                     continue;
5347                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5348                   if (!use_stmt_vinfo
5349                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5350                           != vect_double_reduction_def)
5351                     continue;
5352
5353                   /* Create vector phi node for double reduction:
5354                      vs1 = phi <vs0, vs2>
5355                      vs1 was created previously in this function by a call to
5356                        vect_get_vec_def_for_operand and is stored in
5357                        vec_initial_def;
5358                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5359                      vs0 is created here.  */
5360
5361                   /* Create vector phi node.  */
5362                   vect_phi = create_phi_node (vec_initial_def, bb);
5363                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
5364                                     loop_vec_info_for_loop (outer_loop));
5365                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5366
5367                   /* Create vs0 - initial def of the double reduction phi.  */
5368                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5369                                              loop_preheader_edge (outer_loop));
5370                   init_def = get_initial_def_for_reduction (stmt,
5371                                                           preheader_arg, NULL);
5372                   vect_phi_init = vect_init_vector (use_stmt, init_def,
5373                                                     vectype, NULL);
5374
5375                   /* Update phi node arguments with vs0 and vs2.  */
5376                   add_phi_arg (vect_phi, vect_phi_init,
5377                                loop_preheader_edge (outer_loop),
5378                                UNKNOWN_LOCATION);
5379                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5380                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5381                   if (dump_enabled_p ())
5382                     {
5383                       dump_printf_loc (MSG_NOTE, vect_location,
5384                                        "created double reduction phi node: ");
5385                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5386                     }
5387
5388                   vect_phi_res = PHI_RESULT (vect_phi);
5389
5390                   /* Replace the use, i.e., set the correct vs1 in the regular
5391                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5392                      loop is redundant.  */
5393                   use = reduction_phi;
5394                   for (j = 0; j < ncopies; j++)
5395                     {
5396                       edge pr_edge = loop_preheader_edge (loop);
5397                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5398                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5399                     }
5400                 }
5401             }
5402         }
5403
5404       phis.release ();
5405       if (nested_in_vect_loop)
5406         {
5407           if (double_reduc)
5408             loop = outer_loop;
5409           else
5410             continue;
5411         }
5412
5413       phis.create (3);
5414       /* Find the loop-closed-use at the loop exit of the original scalar
5415          result.  (The reduction result is expected to have two immediate uses,
5416          one at the latch block, and one at the loop exit).  For double
5417          reductions we are looking for exit phis of the outer loop.  */
5418       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5419         {
5420           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5421             {
5422               if (!is_gimple_debug (USE_STMT (use_p)))
5423                 phis.safe_push (USE_STMT (use_p));
5424             }
5425           else
5426             {
5427               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5428                 {
5429                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5430
5431                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5432                     {
5433                       if (!flow_bb_inside_loop_p (loop,
5434                                              gimple_bb (USE_STMT (phi_use_p)))
5435                           && !is_gimple_debug (USE_STMT (phi_use_p)))
5436                         phis.safe_push (USE_STMT (phi_use_p));
5437                     }
5438                 }
5439             }
5440         }
5441
5442       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5443         {
5444           /* Replace the uses:  */
5445           orig_name = PHI_RESULT (exit_phi);
5446           scalar_result = scalar_results[k];
5447           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5448             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5449               SET_USE (use_p, scalar_result);
5450         }
5451
5452       phis.release ();
5453     }
5454 }
5455
5456
5457 /* Function is_nonwrapping_integer_induction.
5458
5459    Check if STMT (which is part of loop LOOP) both increments and
5460    does not cause overflow.  */
5461
5462 static bool
5463 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5464 {
5465   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5466   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5467   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5468   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5469   widest_int ni, max_loop_value, lhs_max;
5470   bool overflow = false;
5471
5472   /* Make sure the loop is integer based.  */
5473   if (TREE_CODE (base) != INTEGER_CST
5474       || TREE_CODE (step) != INTEGER_CST)
5475     return false;
5476
5477   /* Check that the induction increments.  */
5478   if (tree_int_cst_sgn (step) == -1)
5479     return false;
5480
5481   /* Check that the max size of the loop will not wrap.  */
5482
5483   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5484     return true;
5485
5486   if (! max_stmt_executions (loop, &ni))
5487     return false;
5488
5489   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5490                             &overflow);
5491   if (overflow)
5492     return false;
5493
5494   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5495                             TYPE_SIGN (lhs_type), &overflow);
5496   if (overflow)
5497     return false;
5498
5499   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5500           <= TYPE_PRECISION (lhs_type));
5501 }
5502
5503 /* Function vectorizable_reduction.
5504
5505    Check if STMT performs a reduction operation that can be vectorized.
5506    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5507    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5508    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5509
5510    This function also handles reduction idioms (patterns) that have been
5511    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5512    of this form:
5513      X = pattern_expr (arg0, arg1, ..., X)
5514    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5515    sequence that had been detected and replaced by the pattern-stmt (STMT).
5516
5517    This function also handles reduction of condition expressions, for example:
5518      for (int i = 0; i < N; i++)
5519        if (a[i] < value)
5520          last = a[i];
5521    This is handled by vectorising the loop and creating an additional vector
5522    containing the loop indexes for which "a[i] < value" was true.  In the
5523    function epilogue this is reduced to a single max value and then used to
5524    index into the vector of results.
5525
5526    In some cases of reduction patterns, the type of the reduction variable X is
5527    different than the type of the other arguments of STMT.
5528    In such cases, the vectype that is used when transforming STMT into a vector
5529    stmt is different than the vectype that is used to determine the
5530    vectorization factor, because it consists of a different number of elements
5531    than the actual number of elements that are being operated upon in parallel.
5532
5533    For example, consider an accumulation of shorts into an int accumulator.
5534    On some targets it's possible to vectorize this pattern operating on 8
5535    shorts at a time (hence, the vectype for purposes of determining the
5536    vectorization factor should be V8HI); on the other hand, the vectype that
5537    is used to create the vector form is actually V4SI (the type of the result).
5538
5539    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5540    indicates what is the actual level of parallelism (V8HI in the example), so
5541    that the right vectorization factor would be derived.  This vectype
5542    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5543    be used to create the vectorized stmt.  The right vectype for the vectorized
5544    stmt is obtained from the type of the result X:
5545         get_vectype_for_scalar_type (TREE_TYPE (X))
5546
5547    This means that, contrary to "regular" reductions (or "regular" stmts in
5548    general), the following equation:
5549       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5550    does *NOT* necessarily hold for reduction patterns.  */
5551
5552 bool
5553 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5554                         gimple **vec_stmt, slp_tree slp_node)
5555 {
5556   tree vec_dest;
5557   tree scalar_dest;
5558   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5559   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5560   tree vectype_in = NULL_TREE;
5561   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5562   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5563   enum tree_code code, orig_code, epilog_reduc_code;
5564   machine_mode vec_mode;
5565   int op_type;
5566   optab optab, reduc_optab;
5567   tree new_temp = NULL_TREE;
5568   gimple *def_stmt;
5569   enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
5570   gphi *new_phi = NULL;
5571   tree scalar_type;
5572   bool is_simple_use;
5573   gimple *orig_stmt;
5574   stmt_vec_info orig_stmt_info = NULL;
5575   int i;
5576   int ncopies;
5577   int epilog_copies;
5578   stmt_vec_info prev_stmt_info, prev_phi_info;
5579   bool single_defuse_cycle = false;
5580   gimple *new_stmt = NULL;
5581   int j;
5582   tree ops[3];
5583   enum vect_def_type dts[3];
5584   bool nested_cycle = false, found_nested_cycle_def = false;
5585   bool double_reduc = false;
5586   basic_block def_bb;
5587   struct loop * def_stmt_loop, *outer_loop = NULL;
5588   tree def_arg;
5589   gimple *def_arg_stmt;
5590   auto_vec<tree> vec_oprnds0;
5591   auto_vec<tree> vec_oprnds1;
5592   auto_vec<tree> vec_oprnds2;
5593   auto_vec<tree> vect_defs;
5594   auto_vec<gimple *> phis;
5595   int vec_num;
5596   tree def0, tem;
5597   bool first_p = true;
5598   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5599   tree cond_reduc_val = NULL_TREE;
5600
5601   /* Make sure it was already recognized as a reduction computation.  */
5602   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5603       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5604     return false;
5605
5606   if (nested_in_vect_loop_p (loop, stmt))
5607     {
5608       outer_loop = loop;
5609       loop = loop->inner;
5610       nested_cycle = true;
5611     }
5612
5613   /* In case of reduction chain we switch to the first stmt in the chain, but
5614      we don't update STMT_INFO, since only the last stmt is marked as reduction
5615      and has reduction properties.  */
5616   if (GROUP_FIRST_ELEMENT (stmt_info)
5617       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5618     {
5619       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5620       first_p = false;
5621     }
5622
5623   if (gimple_code (stmt) == GIMPLE_PHI)
5624     {
5625       /* Analysis is fully done on the reduction stmt invocation.  */
5626       if (! vec_stmt)
5627         {
5628           STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5629           return true;
5630         }
5631
5632       gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
5633       if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt)))
5634         reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt));
5635
5636       gcc_assert (is_gimple_assign (reduc_stmt));
5637       for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
5638         {
5639           tree op = gimple_op (reduc_stmt, k);
5640           if (op == gimple_phi_result (stmt))
5641             continue;
5642           if (k == 1
5643               && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
5644             continue;
5645           vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
5646           break;
5647         }
5648       gcc_assert (vectype_in);
5649
5650       if (slp_node)
5651         ncopies = 1;
5652       else
5653         ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5654                    / TYPE_VECTOR_SUBPARTS (vectype_in));
5655
5656       use_operand_p use_p;
5657       gimple *use_stmt;
5658       if (ncopies > 1
5659           && (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt))
5660               <= vect_used_only_live)
5661           && single_imm_use (gimple_phi_result (stmt), &use_p, &use_stmt)
5662           && (use_stmt == reduc_stmt
5663               || (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt))
5664                   == reduc_stmt)))
5665         single_defuse_cycle = true;
5666
5667       /* Create the destination vector  */
5668       scalar_dest = gimple_assign_lhs (reduc_stmt);
5669       vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5670
5671       if (slp_node)
5672         /* The size vect_schedule_slp_instance computes is off for us.  */
5673         vec_num = ((LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5674                     * SLP_TREE_SCALAR_STMTS (slp_node).length ())
5675                    / TYPE_VECTOR_SUBPARTS (vectype_in));
5676       else
5677         vec_num = 1;
5678
5679       /* Generate the reduction PHIs upfront.  */
5680       prev_phi_info = NULL;
5681       for (j = 0; j < ncopies; j++)
5682         {
5683           if (j == 0 || !single_defuse_cycle)
5684             {
5685               for (i = 0; i < vec_num; i++)
5686                 {
5687                   /* Create the reduction-phi that defines the reduction
5688                      operand.  */
5689                   new_phi = create_phi_node (vec_dest, loop->header);
5690                   set_vinfo_for_stmt (new_phi,
5691                                       new_stmt_vec_info (new_phi, loop_vinfo));
5692
5693                   if (slp_node)
5694                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi);
5695                   else
5696                     {
5697                       if (j == 0)
5698                         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi;
5699                       else
5700                         STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5701                       prev_phi_info = vinfo_for_stmt (new_phi);
5702                     }
5703                 }
5704             }
5705         }
5706
5707       return true;
5708     }
5709
5710   /* 1. Is vectorizable reduction?  */
5711   /* Not supportable if the reduction variable is used in the loop, unless
5712      it's a reduction chain.  */
5713   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5714       && !GROUP_FIRST_ELEMENT (stmt_info))
5715     return false;
5716
5717   /* Reductions that are not used even in an enclosing outer-loop,
5718      are expected to be "live" (used out of the loop).  */
5719   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5720       && !STMT_VINFO_LIVE_P (stmt_info))
5721     return false;
5722
5723   /* 2. Has this been recognized as a reduction pattern?
5724
5725      Check if STMT represents a pattern that has been recognized
5726      in earlier analysis stages.  For stmts that represent a pattern,
5727      the STMT_VINFO_RELATED_STMT field records the last stmt in
5728      the original sequence that constitutes the pattern.  */
5729
5730   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5731   if (orig_stmt)
5732     {
5733       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5734       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5735       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5736     }
5737
5738   /* 3. Check the operands of the operation.  The first operands are defined
5739         inside the loop body. The last operand is the reduction variable,
5740         which is defined by the loop-header-phi.  */
5741
5742   gcc_assert (is_gimple_assign (stmt));
5743
5744   /* Flatten RHS.  */
5745   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5746     {
5747     case GIMPLE_BINARY_RHS:
5748       code = gimple_assign_rhs_code (stmt);
5749       op_type = TREE_CODE_LENGTH (code);
5750       gcc_assert (op_type == binary_op);
5751       ops[0] = gimple_assign_rhs1 (stmt);
5752       ops[1] = gimple_assign_rhs2 (stmt);
5753       break;
5754
5755     case GIMPLE_TERNARY_RHS:
5756       code = gimple_assign_rhs_code (stmt);
5757       op_type = TREE_CODE_LENGTH (code);
5758       gcc_assert (op_type == ternary_op);
5759       ops[0] = gimple_assign_rhs1 (stmt);
5760       ops[1] = gimple_assign_rhs2 (stmt);
5761       ops[2] = gimple_assign_rhs3 (stmt);
5762       break;
5763
5764     case GIMPLE_UNARY_RHS:
5765       return false;
5766
5767     default:
5768       gcc_unreachable ();
5769     }
5770
5771   if (code == COND_EXPR && slp_node)
5772     return false;
5773
5774   scalar_dest = gimple_assign_lhs (stmt);
5775   scalar_type = TREE_TYPE (scalar_dest);
5776   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5777       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5778     return false;
5779
5780   /* Do not try to vectorize bit-precision reductions.  */
5781   if ((TYPE_PRECISION (scalar_type)
5782        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5783     return false;
5784
5785   /* All uses but the last are expected to be defined in the loop.
5786      The last use is the reduction variable.  In case of nested cycle this
5787      assumption is not true: we use reduc_index to record the index of the
5788      reduction variable.  */
5789   gimple *reduc_def_stmt = NULL;
5790   int reduc_index = -1;
5791   for (i = 0; i < op_type; i++)
5792     {
5793       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5794       if (i == 0 && code == COND_EXPR)
5795         continue;
5796
5797       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5798                                           &def_stmt, &dts[i], &tem);
5799       dt = dts[i];
5800       gcc_assert (is_simple_use);
5801       if (dt == vect_reduction_def)
5802         {
5803           reduc_def_stmt = def_stmt;
5804           reduc_index = i;
5805           continue;
5806         }
5807       else
5808         {
5809           if (!vectype_in)
5810             vectype_in = tem;
5811         }
5812
5813       if (dt != vect_internal_def
5814           && dt != vect_external_def
5815           && dt != vect_constant_def
5816           && dt != vect_induction_def
5817           && !(dt == vect_nested_cycle && nested_cycle))
5818         return false;
5819
5820       if (dt == vect_nested_cycle)
5821         {
5822           found_nested_cycle_def = true;
5823           reduc_def_stmt = def_stmt;
5824           reduc_index = i;
5825         }
5826
5827       if (i == 1 && code == COND_EXPR)
5828         {
5829           /* Record how value of COND_EXPR is defined.  */
5830           if (dt == vect_constant_def)
5831             {
5832               cond_reduc_dt = dt;
5833               cond_reduc_val = ops[i];
5834             }
5835           if (dt == vect_induction_def && def_stmt != NULL
5836               && is_nonwrapping_integer_induction (def_stmt, loop))
5837             cond_reduc_dt = dt;
5838         }
5839     }
5840
5841   if (!vectype_in)
5842     vectype_in = vectype_out;
5843
5844   /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
5845      directy used in stmt.  */
5846   if (reduc_index == -1)
5847     {
5848       if (orig_stmt)
5849         reduc_def_stmt = STMT_VINFO_REDUC_DEF (orig_stmt_info);
5850       else
5851         reduc_def_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
5852     }
5853
5854   if (! reduc_def_stmt || gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5855     return false;
5856
5857   if (!(reduc_index == -1
5858         || dts[reduc_index] == vect_reduction_def
5859         || dts[reduc_index] == vect_nested_cycle
5860         || ((dts[reduc_index] == vect_internal_def
5861              || dts[reduc_index] == vect_external_def
5862              || dts[reduc_index] == vect_constant_def
5863              || dts[reduc_index] == vect_induction_def)
5864             && nested_cycle && found_nested_cycle_def)))
5865     {
5866       /* For pattern recognized stmts, orig_stmt might be a reduction,
5867          but some helper statements for the pattern might not, or
5868          might be COND_EXPRs with reduction uses in the condition.  */
5869       gcc_assert (orig_stmt);
5870       return false;
5871     }
5872
5873   stmt_vec_info reduc_def_info = vinfo_for_stmt (reduc_def_stmt);
5874   enum vect_reduction_type v_reduc_type
5875     = STMT_VINFO_REDUC_TYPE (reduc_def_info);
5876   gimple *tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
5877
5878   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5879   /* If we have a condition reduction, see if we can simplify it further.  */
5880   if (v_reduc_type == COND_REDUCTION)
5881     {
5882       if (cond_reduc_dt == vect_induction_def)
5883         {
5884           if (dump_enabled_p ())
5885             dump_printf_loc (MSG_NOTE, vect_location,
5886                              "condition expression based on "
5887                              "integer induction.\n");
5888           STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5889             = INTEGER_INDUC_COND_REDUCTION;
5890         }
5891
5892       /* Loop peeling modifies initial value of reduction PHI, which
5893          makes the reduction stmt to be transformed different to the
5894          original stmt analyzed.  We need to record reduction code for
5895          CONST_COND_REDUCTION type reduction at analyzing stage, thus
5896          it can be used directly at transform stage.  */
5897       if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
5898           || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
5899         {
5900           /* Also set the reduction type to CONST_COND_REDUCTION.  */
5901           gcc_assert (cond_reduc_dt == vect_constant_def);
5902           STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
5903         }
5904       else if (cond_reduc_dt == vect_constant_def)
5905         {
5906           enum vect_def_type cond_initial_dt;
5907           gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
5908           tree cond_initial_val
5909             = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
5910
5911           gcc_assert (cond_reduc_val != NULL_TREE);
5912           vect_is_simple_use (cond_initial_val, loop_vinfo,
5913                               &def_stmt, &cond_initial_dt);
5914           if (cond_initial_dt == vect_constant_def
5915               && types_compatible_p (TREE_TYPE (cond_initial_val),
5916                                      TREE_TYPE (cond_reduc_val)))
5917             {
5918               tree e = fold_binary (LE_EXPR, boolean_type_node,
5919                                     cond_initial_val, cond_reduc_val);
5920               if (e && (integer_onep (e) || integer_zerop (e)))
5921                 {
5922                   if (dump_enabled_p ())
5923                     dump_printf_loc (MSG_NOTE, vect_location,
5924                                      "condition expression based on "
5925                                      "compile time constant.\n");
5926                   /* Record reduction code at analysis stage.  */
5927                   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
5928                     = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
5929                   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5930                     = CONST_COND_REDUCTION;
5931                 }
5932             }
5933         }
5934     }
5935
5936   if (orig_stmt)
5937     gcc_assert (tmp == orig_stmt
5938                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5939   else
5940     /* We changed STMT to be the first stmt in reduction chain, hence we
5941        check that in this case the first element in the chain is STMT.  */
5942     gcc_assert (stmt == tmp
5943                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5944
5945   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5946     return false;
5947
5948   if (slp_node)
5949     ncopies = 1;
5950   else
5951     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5952                / TYPE_VECTOR_SUBPARTS (vectype_in));
5953
5954   gcc_assert (ncopies >= 1);
5955
5956   vec_mode = TYPE_MODE (vectype_in);
5957
5958   if (code == COND_EXPR)
5959     {
5960       /* Only call during the analysis stage, otherwise we'll lose
5961          STMT_VINFO_TYPE.  */
5962       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5963                                                 ops[reduc_index], 0, NULL))
5964         {
5965           if (dump_enabled_p ())
5966             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5967                              "unsupported condition in reduction\n");
5968           return false;
5969         }
5970     }
5971   else
5972     {
5973       /* 4. Supportable by target?  */
5974
5975       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5976           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5977         {
5978           /* Shifts and rotates are only supported by vectorizable_shifts,
5979              not vectorizable_reduction.  */
5980           if (dump_enabled_p ())
5981             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5982                              "unsupported shift or rotation.\n");
5983           return false;
5984         }
5985
5986       /* 4.1. check support for the operation in the loop  */
5987       optab = optab_for_tree_code (code, vectype_in, optab_default);
5988       if (!optab)
5989         {
5990           if (dump_enabled_p ())
5991             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5992                              "no optab.\n");
5993
5994           return false;
5995         }
5996
5997       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5998         {
5999           if (dump_enabled_p ())
6000             dump_printf (MSG_NOTE, "op not supported by target.\n");
6001
6002           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
6003               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6004                   < vect_min_worthwhile_factor (code))
6005             return false;
6006
6007           if (dump_enabled_p ())
6008             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
6009         }
6010
6011       /* Worthwhile without SIMD support?  */
6012       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
6013           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6014              < vect_min_worthwhile_factor (code))
6015         {
6016           if (dump_enabled_p ())
6017             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6018                              "not worthwhile without SIMD support.\n");
6019
6020           return false;
6021         }
6022     }
6023
6024   /* 4.2. Check support for the epilog operation.
6025
6026           If STMT represents a reduction pattern, then the type of the
6027           reduction variable may be different than the type of the rest
6028           of the arguments.  For example, consider the case of accumulation
6029           of shorts into an int accumulator; The original code:
6030                         S1: int_a = (int) short_a;
6031           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
6032
6033           was replaced with:
6034                         STMT: int_acc = widen_sum <short_a, int_acc>
6035
6036           This means that:
6037           1. The tree-code that is used to create the vector operation in the
6038              epilog code (that reduces the partial results) is not the
6039              tree-code of STMT, but is rather the tree-code of the original
6040              stmt from the pattern that STMT is replacing.  I.e, in the example
6041              above we want to use 'widen_sum' in the loop, but 'plus' in the
6042              epilog.
6043           2. The type (mode) we use to check available target support
6044              for the vector operation to be created in the *epilog*, is
6045              determined by the type of the reduction variable (in the example
6046              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
6047              However the type (mode) we use to check available target support
6048              for the vector operation to be created *inside the loop*, is
6049              determined by the type of the other arguments to STMT (in the
6050              example we'd check this: optab_handler (widen_sum_optab,
6051              vect_short_mode)).
6052
6053           This is contrary to "regular" reductions, in which the types of all
6054           the arguments are the same as the type of the reduction variable.
6055           For "regular" reductions we can therefore use the same vector type
6056           (and also the same tree-code) when generating the epilog code and
6057           when generating the code inside the loop.  */
6058
6059   if (orig_stmt)
6060     {
6061       /* This is a reduction pattern: get the vectype from the type of the
6062          reduction variable, and get the tree-code from orig_stmt.  */
6063       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
6064                   == TREE_CODE_REDUCTION);
6065       orig_code = gimple_assign_rhs_code (orig_stmt);
6066       gcc_assert (vectype_out);
6067       vec_mode = TYPE_MODE (vectype_out);
6068     }
6069   else
6070     {
6071       /* Regular reduction: use the same vectype and tree-code as used for
6072          the vector code inside the loop can be used for the epilog code. */
6073       orig_code = code;
6074
6075       if (code == MINUS_EXPR)
6076         orig_code = PLUS_EXPR;
6077
6078       /* For simple condition reductions, replace with the actual expression
6079          we want to base our reduction around.  */
6080       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
6081         {
6082           orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
6083           gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
6084         }
6085       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
6086                  == INTEGER_INDUC_COND_REDUCTION)
6087         orig_code = MAX_EXPR;
6088     }
6089
6090   if (nested_cycle)
6091     {
6092       def_bb = gimple_bb (reduc_def_stmt);
6093       def_stmt_loop = def_bb->loop_father;
6094       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
6095                                        loop_preheader_edge (def_stmt_loop));
6096       if (TREE_CODE (def_arg) == SSA_NAME
6097           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
6098           && gimple_code (def_arg_stmt) == GIMPLE_PHI
6099           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
6100           && vinfo_for_stmt (def_arg_stmt)
6101           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
6102               == vect_double_reduction_def)
6103         double_reduc = true;
6104     }
6105
6106   epilog_reduc_code = ERROR_MARK;
6107
6108   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
6109     {
6110       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
6111         {
6112           reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
6113                                          optab_default);
6114           if (!reduc_optab)
6115             {
6116               if (dump_enabled_p ())
6117                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6118                                  "no optab for reduction.\n");
6119
6120               epilog_reduc_code = ERROR_MARK;
6121             }
6122           else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
6123             {
6124               if (dump_enabled_p ())
6125                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6126                                  "reduc op not supported by target.\n");
6127
6128               epilog_reduc_code = ERROR_MARK;
6129             }
6130         }
6131       else
6132         {
6133           if (!nested_cycle || double_reduc)
6134             {
6135               if (dump_enabled_p ())
6136                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6137                                  "no reduc code for scalar code.\n");
6138
6139               return false;
6140             }
6141         }
6142     }
6143   else
6144     {
6145       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
6146       cr_index_scalar_type = make_unsigned_type (scalar_precision);
6147       cr_index_vector_type = build_vector_type
6148         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
6149
6150       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
6151                                    optab_default);
6152       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
6153           != CODE_FOR_nothing)
6154         epilog_reduc_code = REDUC_MAX_EXPR;
6155     }
6156
6157   if ((double_reduc
6158        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
6159       && ncopies > 1)
6160     {
6161       if (dump_enabled_p ())
6162         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6163                          "multiple types in double reduction or condition "
6164                          "reduction.\n");
6165       return false;
6166     }
6167
6168   /* In case of widenning multiplication by a constant, we update the type
6169      of the constant to be the type of the other operand.  We check that the
6170      constant fits the type in the pattern recognition pass.  */
6171   if (code == DOT_PROD_EXPR
6172       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
6173     {
6174       if (TREE_CODE (ops[0]) == INTEGER_CST)
6175         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
6176       else if (TREE_CODE (ops[1]) == INTEGER_CST)
6177         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
6178       else
6179         {
6180           if (dump_enabled_p ())
6181             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6182                              "invalid types in dot-prod\n");
6183
6184           return false;
6185         }
6186     }
6187
6188   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6189     {
6190       widest_int ni;
6191
6192       if (! max_loop_iterations (loop, &ni))
6193         {
6194           if (dump_enabled_p ())
6195             dump_printf_loc (MSG_NOTE, vect_location,
6196                              "loop count not known, cannot create cond "
6197                              "reduction.\n");
6198           return false;
6199         }
6200       /* Convert backedges to iterations.  */
6201       ni += 1;
6202
6203       /* The additional index will be the same type as the condition.  Check
6204          that the loop can fit into this less one (because we'll use up the
6205          zero slot for when there are no matches).  */
6206       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
6207       if (wi::geu_p (ni, wi::to_widest (max_index)))
6208         {
6209           if (dump_enabled_p ())
6210             dump_printf_loc (MSG_NOTE, vect_location,
6211                              "loop size is greater than data size.\n");
6212           return false;
6213         }
6214     }
6215
6216   if (!vec_stmt) /* transformation not required.  */
6217     {
6218       if (first_p)
6219         vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
6220       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
6221       return true;
6222     }
6223
6224   /* Transform.  */
6225
6226   if (dump_enabled_p ())
6227     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
6228
6229   /* FORNOW: Multiple types are not supported for condition.  */
6230   if (code == COND_EXPR)
6231     gcc_assert (ncopies == 1);
6232
6233   /* Create the destination vector  */
6234   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6235
6236   /* In case the vectorization factor (VF) is bigger than the number
6237      of elements that we can fit in a vectype (nunits), we have to generate
6238      more than one vector stmt - i.e - we need to "unroll" the
6239      vector stmt by a factor VF/nunits.  For more details see documentation
6240      in vectorizable_operation.  */
6241
6242   /* If the reduction is used in an outer loop we need to generate
6243      VF intermediate results, like so (e.g. for ncopies=2):
6244         r0 = phi (init, r0)
6245         r1 = phi (init, r1)
6246         r0 = x0 + r0;
6247         r1 = x1 + r1;
6248     (i.e. we generate VF results in 2 registers).
6249     In this case we have a separate def-use cycle for each copy, and therefore
6250     for each copy we get the vector def for the reduction variable from the
6251     respective phi node created for this copy.
6252
6253     Otherwise (the reduction is unused in the loop nest), we can combine
6254     together intermediate results, like so (e.g. for ncopies=2):
6255         r = phi (init, r)
6256         r = x0 + r;
6257         r = x1 + r;
6258    (i.e. we generate VF/2 results in a single register).
6259    In this case for each copy we get the vector def for the reduction variable
6260    from the vectorized reduction operation generated in the previous iteration.
6261
6262    This only works when we see both the reduction PHI and its only consumer
6263    in vectorizable_reduction and there are no intermediate stmts
6264    participating.  */
6265   use_operand_p use_p;
6266   gimple *use_stmt;
6267   if (ncopies > 1
6268       && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
6269       && single_imm_use (gimple_phi_result (reduc_def_stmt), &use_p, &use_stmt)
6270       && (use_stmt == stmt
6271           || STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) == stmt))
6272     {
6273       single_defuse_cycle = true;
6274       epilog_copies = 1;
6275     }
6276   else
6277     epilog_copies = ncopies;
6278
6279   prev_stmt_info = NULL;
6280   prev_phi_info = NULL;
6281   if (slp_node)
6282     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6283   else
6284     {
6285       vec_num = 1;
6286       vec_oprnds0.create (1);
6287       vec_oprnds1.create (1);
6288       if (op_type == ternary_op)
6289         vec_oprnds2.create (1);
6290     }
6291
6292   phis.create (vec_num);
6293   vect_defs.create (vec_num);
6294   if (!slp_node)
6295     vect_defs.quick_push (NULL_TREE);
6296
6297   auto_vec<tree> vec_oprnds;
6298   for (j = 0; j < ncopies; j++)
6299     {
6300       if (j == 0 || !single_defuse_cycle)
6301         {
6302           for (i = 0; i < vec_num; i++)
6303             {
6304               /* Get the created reduction-phi that defines the reduction
6305                  operand.  */
6306               tree reduc_def = gimple_phi_result (reduc_def_stmt);
6307               if (j == 0)
6308                 vect_get_vec_defs (reduc_def, NULL, stmt, &vec_oprnds, NULL,
6309                                    slp_node);
6310               else
6311                 {
6312                   dt = vect_reduction_def;
6313                   vect_get_vec_defs_for_stmt_copy (&dt,
6314                                                    &vec_oprnds, NULL);
6315                 }
6316               new_phi = as_a <gphi *> (SSA_NAME_DEF_STMT (vec_oprnds[i]));
6317               if (j == 0 || slp_node)
6318                 phis.quick_push (new_phi);
6319             }
6320         }
6321
6322       if (code == COND_EXPR)
6323         {
6324           gcc_assert (!slp_node);
6325           vectorizable_condition (stmt, gsi, vec_stmt,
6326                                   PHI_RESULT (phis[0]),
6327                                   reduc_index, NULL);
6328           /* Multiple types are not supported for condition.  */
6329           break;
6330         }
6331
6332       /* Handle uses.  */
6333       if (j == 0)
6334         {
6335           if (slp_node)
6336             {
6337               /* Get vec defs for all the operands except the reduction index,
6338                  ensuring the ordering of the ops in the vector is kept.  */
6339               auto_vec<tree, 3> slp_ops;
6340               auto_vec<vec<tree>, 3> vec_defs;
6341
6342               slp_ops.quick_push (ops[0]);
6343               slp_ops.quick_push (ops[1]);
6344               if (op_type == ternary_op)
6345                 slp_ops.quick_push (ops[2]);
6346
6347               vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
6348
6349               vec_oprnds0.safe_splice (vec_defs[0]);
6350               vec_defs[0].release ();
6351               vec_oprnds1.safe_splice (vec_defs[1]);
6352               vec_defs[1].release ();
6353               if (op_type == ternary_op)
6354                 {
6355                   vec_oprnds2.safe_splice (vec_defs[2]);
6356                   vec_defs[2].release ();
6357                 }
6358             }
6359           else
6360             {
6361               vec_oprnds0.quick_push
6362                 (vect_get_vec_def_for_operand (ops[0], stmt));
6363               vec_oprnds1.quick_push
6364                 (vect_get_vec_def_for_operand (ops[1], stmt));
6365               if (op_type == ternary_op)
6366                 vec_oprnds2.quick_push
6367                   (vect_get_vec_def_for_operand (ops[2], stmt));
6368             }
6369         }
6370       else
6371         {
6372           if (!slp_node)
6373             {
6374               gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
6375
6376               if (single_defuse_cycle && reduc_index == 0)
6377                 vec_oprnds0[0] = gimple_assign_lhs (new_stmt);
6378               else
6379                 vec_oprnds0[0]
6380                   = vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]);
6381               if (single_defuse_cycle && reduc_index == 1)
6382                 vec_oprnds1[0] = gimple_assign_lhs (new_stmt);
6383               else
6384                 vec_oprnds1[0]
6385                   = vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]);
6386               if (op_type == ternary_op)
6387                 {
6388                   if (single_defuse_cycle && reduc_index == 2)
6389                     vec_oprnds2[0] = gimple_assign_lhs (new_stmt);
6390                   else
6391                     vec_oprnds2[0]
6392                       = vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]);
6393                 }
6394             }
6395         }
6396
6397       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
6398         {
6399           tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
6400           if (op_type == ternary_op)
6401             vop[2] = vec_oprnds2[i];
6402
6403           new_temp = make_ssa_name (vec_dest, new_stmt);
6404           new_stmt = gimple_build_assign (new_temp, code,
6405                                           vop[0], vop[1], vop[2]);
6406           vect_finish_stmt_generation (stmt, new_stmt, gsi);
6407
6408           if (slp_node)
6409             {
6410               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6411               vect_defs.quick_push (new_temp);
6412             }
6413           else
6414             vect_defs[0] = new_temp;
6415         }
6416
6417       if (slp_node)
6418         continue;
6419
6420       if (j == 0)
6421         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6422       else
6423         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6424
6425       prev_stmt_info = vinfo_for_stmt (new_stmt);
6426     }
6427
6428   /* Finalize the reduction-phi (set its arguments) and create the
6429      epilog reduction code.  */
6430   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6431     vect_defs[0] = gimple_assign_lhs (*vec_stmt);
6432
6433   vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
6434                                     epilog_copies,
6435                                     epilog_reduc_code, phis, reduc_index,
6436                                     double_reduc, slp_node);
6437
6438   return true;
6439 }
6440
6441 /* Function vect_min_worthwhile_factor.
6442
6443    For a loop where we could vectorize the operation indicated by CODE,
6444    return the minimum vectorization factor that makes it worthwhile
6445    to use generic vectors.  */
6446 int
6447 vect_min_worthwhile_factor (enum tree_code code)
6448 {
6449   switch (code)
6450     {
6451     case PLUS_EXPR:
6452     case MINUS_EXPR:
6453     case NEGATE_EXPR:
6454       return 4;
6455
6456     case BIT_AND_EXPR:
6457     case BIT_IOR_EXPR:
6458     case BIT_XOR_EXPR:
6459     case BIT_NOT_EXPR:
6460       return 2;
6461
6462     default:
6463       return INT_MAX;
6464     }
6465 }
6466
6467
6468 /* Function vectorizable_induction
6469
6470    Check if PHI performs an induction computation that can be vectorized.
6471    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6472    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6473    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6474
6475 bool
6476 vectorizable_induction (gimple *phi,
6477                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6478                         gimple **vec_stmt, slp_tree slp_node)
6479 {
6480   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6481   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6482   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6483   unsigned ncopies;
6484   bool nested_in_vect_loop = false;
6485   struct loop *iv_loop;
6486   tree vec_def;
6487   edge pe = loop_preheader_edge (loop);
6488   basic_block new_bb;
6489   tree new_vec, vec_init, vec_step, t;
6490   tree new_name;
6491   gimple *new_stmt;
6492   gphi *induction_phi;
6493   tree induc_def, vec_dest;
6494   tree init_expr, step_expr;
6495   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6496   unsigned i;
6497   tree expr;
6498   gimple_seq stmts;
6499   imm_use_iterator imm_iter;
6500   use_operand_p use_p;
6501   gimple *exit_phi;
6502   edge latch_e;
6503   tree loop_arg;
6504   gimple_stmt_iterator si;
6505   basic_block bb = gimple_bb (phi);
6506
6507   if (gimple_code (phi) != GIMPLE_PHI)
6508     return false;
6509
6510   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6511     return false;
6512
6513   /* Make sure it was recognized as induction computation.  */
6514   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
6515     return false;
6516
6517   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6518   unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
6519
6520   if (slp_node)
6521     ncopies = 1;
6522   else
6523     ncopies = vf / nunits;
6524   gcc_assert (ncopies >= 1);
6525
6526   /* FORNOW. These restrictions should be relaxed.  */
6527   if (nested_in_vect_loop_p (loop, phi))
6528     {
6529       imm_use_iterator imm_iter;
6530       use_operand_p use_p;
6531       gimple *exit_phi;
6532       edge latch_e;
6533       tree loop_arg;
6534
6535       if (ncopies > 1)
6536         {
6537           if (dump_enabled_p ())
6538             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6539                              "multiple types in nested loop.\n");
6540           return false;
6541         }
6542
6543       /* FORNOW: outer loop induction with SLP not supported.  */
6544       if (STMT_SLP_TYPE (stmt_info))
6545         return false;
6546
6547       exit_phi = NULL;
6548       latch_e = loop_latch_edge (loop->inner);
6549       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6550       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6551         {
6552           gimple *use_stmt = USE_STMT (use_p);
6553           if (is_gimple_debug (use_stmt))
6554             continue;
6555
6556           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6557             {
6558               exit_phi = use_stmt;
6559               break;
6560             }
6561         }
6562       if (exit_phi)
6563         {
6564           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6565           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6566                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6567             {
6568               if (dump_enabled_p ())
6569                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6570                                  "inner-loop induction only used outside "
6571                                  "of the outer vectorized loop.\n");
6572               return false;
6573             }
6574         }
6575
6576       nested_in_vect_loop = true;
6577       iv_loop = loop->inner;
6578     }
6579   else
6580     iv_loop = loop;
6581   gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
6582
6583   if (!vec_stmt) /* transformation not required.  */
6584     {
6585       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6586       if (dump_enabled_p ())
6587         dump_printf_loc (MSG_NOTE, vect_location,
6588                          "=== vectorizable_induction ===\n");
6589       vect_model_induction_cost (stmt_info, ncopies);
6590       return true;
6591     }
6592
6593   /* Transform.  */
6594
6595   /* Compute a vector variable, initialized with the first VF values of
6596      the induction variable.  E.g., for an iv with IV_PHI='X' and
6597      evolution S, for a vector of 4 units, we want to compute:
6598      [X, X + S, X + 2*S, X + 3*S].  */
6599
6600   if (dump_enabled_p ())
6601     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6602
6603   latch_e = loop_latch_edge (iv_loop);
6604   loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6605
6606   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
6607   gcc_assert (step_expr != NULL_TREE);
6608
6609   pe = loop_preheader_edge (iv_loop);
6610   init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
6611                                      loop_preheader_edge (iv_loop));
6612
6613   /* Convert the step to the desired type.  */
6614   stmts = NULL;
6615   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
6616   if (stmts)
6617     {
6618       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6619       gcc_assert (!new_bb);
6620     }
6621
6622   /* Find the first insertion point in the BB.  */
6623   si = gsi_after_labels (bb);
6624
6625   /* For SLP induction we have to generate several IVs as for example
6626      with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
6627      [i + 2*S, i + 3*S, i + 3*S, i + 3*S].  The step is the same uniform
6628      [VF*S, VF*S, VF*S, VF*S] for all.  */
6629   if (slp_node)
6630     {
6631       /* Convert the init to the desired type.  */
6632       stmts = NULL;
6633       init_expr = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
6634       if (stmts)
6635         {
6636           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6637           gcc_assert (!new_bb);
6638         }
6639
6640       /* Generate [VF*S, VF*S, ... ].  */
6641       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6642         {
6643           expr = build_int_cst (integer_type_node, vf);
6644           expr = fold_convert (TREE_TYPE (step_expr), expr);
6645         }
6646       else
6647         expr = build_int_cst (TREE_TYPE (step_expr), vf);
6648       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6649                               expr, step_expr);
6650       if (! CONSTANT_CLASS_P (new_name))
6651         new_name = vect_init_vector (phi, new_name,
6652                                      TREE_TYPE (step_expr), NULL);
6653       new_vec = build_vector_from_val (vectype, new_name);
6654       vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6655
6656       /* Now generate the IVs.  */
6657       unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6658       unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6659       unsigned elts = nunits * nvects;
6660       unsigned nivs = least_common_multiple (group_size, nunits) / nunits;
6661       gcc_assert (elts % group_size == 0);
6662       tree elt = init_expr;
6663       unsigned ivn;
6664       for (ivn = 0; ivn < nivs; ++ivn)
6665         {
6666           tree *elts = XALLOCAVEC (tree, nunits);
6667           bool constant_p = true;
6668           for (unsigned eltn = 0; eltn < nunits; ++eltn)
6669             {
6670               if (ivn*nunits + eltn >= group_size
6671                   && (ivn*nunits + eltn) % group_size == 0)
6672                 {
6673                   stmts = NULL;
6674                   elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
6675                                       elt, step_expr);
6676                   if (stmts)
6677                     {
6678                       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6679                       gcc_assert (!new_bb);
6680                     }
6681                 }
6682               if (! CONSTANT_CLASS_P (elt))
6683                 constant_p = false;
6684               elts[eltn] = elt;
6685             }
6686           if (constant_p)
6687             new_vec = build_vector (vectype, elts);
6688           else
6689             {
6690               vec<constructor_elt, va_gc> *v;
6691               vec_alloc (v, nunits);
6692               for (i = 0; i < nunits; ++i)
6693                 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
6694               new_vec = build_constructor (vectype, v);
6695             }
6696           vec_init = vect_init_vector (phi, new_vec, vectype, NULL);
6697
6698           /* Create the induction-phi that defines the induction-operand.  */
6699           vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
6700           induction_phi = create_phi_node (vec_dest, iv_loop->header);
6701           set_vinfo_for_stmt (induction_phi,
6702                               new_stmt_vec_info (induction_phi, loop_vinfo));
6703           induc_def = PHI_RESULT (induction_phi);
6704
6705           /* Create the iv update inside the loop  */
6706           vec_def = make_ssa_name (vec_dest);
6707           new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
6708           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6709           set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
6710
6711           /* Set the arguments of the phi node:  */
6712           add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
6713           add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
6714                        UNKNOWN_LOCATION);
6715
6716           SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
6717         }
6718
6719       /* Re-use IVs when we can.  */
6720       if (ivn < nvects)
6721         {
6722           unsigned vfp
6723             = least_common_multiple (group_size, nunits) / group_size;
6724           /* Generate [VF'*S, VF'*S, ... ].  */
6725           if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6726             {
6727               expr = build_int_cst (integer_type_node, vfp);
6728               expr = fold_convert (TREE_TYPE (step_expr), expr);
6729             }
6730           else
6731             expr = build_int_cst (TREE_TYPE (step_expr), vfp);
6732           new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6733                                   expr, step_expr);
6734           if (! CONSTANT_CLASS_P (new_name))
6735             new_name = vect_init_vector (phi, new_name,
6736                                          TREE_TYPE (step_expr), NULL);
6737           new_vec = build_vector_from_val (vectype, new_name);
6738           vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6739           for (; ivn < nvects; ++ivn)
6740             {
6741               gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
6742               tree def;
6743               if (gimple_code (iv) == GIMPLE_PHI)
6744                 def = gimple_phi_result (iv);
6745               else
6746                 def = gimple_assign_lhs (iv);
6747               new_stmt = gimple_build_assign (make_ssa_name (vectype),
6748                                               PLUS_EXPR,
6749                                               def, vec_step);
6750               if (gimple_code (iv) == GIMPLE_PHI)
6751                 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6752               else
6753                 {
6754                   gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
6755                   gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING);
6756                 }
6757               set_vinfo_for_stmt (new_stmt,
6758                                   new_stmt_vec_info (new_stmt, loop_vinfo));
6759               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6760             }
6761         }
6762
6763       return true;
6764     }
6765
6766   /* Create the vector that holds the initial_value of the induction.  */
6767   if (nested_in_vect_loop)
6768     {
6769       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
6770          been created during vectorization of previous stmts.  We obtain it
6771          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
6772       vec_init = vect_get_vec_def_for_operand (init_expr, phi);
6773       /* If the initial value is not of proper type, convert it.  */
6774       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
6775         {
6776           new_stmt
6777             = gimple_build_assign (vect_get_new_ssa_name (vectype,
6778                                                           vect_simple_var,
6779                                                           "vec_iv_"),
6780                                    VIEW_CONVERT_EXPR,
6781                                    build1 (VIEW_CONVERT_EXPR, vectype,
6782                                            vec_init));
6783           vec_init = gimple_assign_lhs (new_stmt);
6784           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
6785                                                  new_stmt);
6786           gcc_assert (!new_bb);
6787           set_vinfo_for_stmt (new_stmt,
6788                               new_stmt_vec_info (new_stmt, loop_vinfo));
6789         }
6790     }
6791   else
6792     {
6793       vec<constructor_elt, va_gc> *v;
6794
6795       /* iv_loop is the loop to be vectorized. Create:
6796          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
6797       stmts = NULL;
6798       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
6799
6800       vec_alloc (v, nunits);
6801       bool constant_p = is_gimple_min_invariant (new_name);
6802       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
6803       for (i = 1; i < nunits; i++)
6804         {
6805           /* Create: new_name_i = new_name + step_expr  */
6806           new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
6807                                    new_name, step_expr);
6808           if (!is_gimple_min_invariant (new_name))
6809             constant_p = false;
6810           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
6811         }
6812       if (stmts)
6813         {
6814           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6815           gcc_assert (!new_bb);
6816         }
6817
6818       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
6819       if (constant_p)
6820         new_vec = build_vector_from_ctor (vectype, v);
6821       else
6822         new_vec = build_constructor (vectype, v);
6823       vec_init = vect_init_vector (phi, new_vec, vectype, NULL);
6824     }
6825
6826
6827   /* Create the vector that holds the step of the induction.  */
6828   if (nested_in_vect_loop)
6829     /* iv_loop is nested in the loop to be vectorized. Generate:
6830        vec_step = [S, S, S, S]  */
6831     new_name = step_expr;
6832   else
6833     {
6834       /* iv_loop is the loop to be vectorized. Generate:
6835           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
6836       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6837         {
6838           expr = build_int_cst (integer_type_node, vf);
6839           expr = fold_convert (TREE_TYPE (step_expr), expr);
6840         }
6841       else
6842         expr = build_int_cst (TREE_TYPE (step_expr), vf);
6843       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6844                               expr, step_expr);
6845       if (TREE_CODE (step_expr) == SSA_NAME)
6846         new_name = vect_init_vector (phi, new_name,
6847                                      TREE_TYPE (step_expr), NULL);
6848     }
6849
6850   t = unshare_expr (new_name);
6851   gcc_assert (CONSTANT_CLASS_P (new_name)
6852               || TREE_CODE (new_name) == SSA_NAME);
6853   new_vec = build_vector_from_val (vectype, t);
6854   vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6855
6856
6857   /* Create the following def-use cycle:
6858      loop prolog:
6859          vec_init = ...
6860          vec_step = ...
6861      loop:
6862          vec_iv = PHI <vec_init, vec_loop>
6863          ...
6864          STMT
6865          ...
6866          vec_loop = vec_iv + vec_step;  */
6867
6868   /* Create the induction-phi that defines the induction-operand.  */
6869   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
6870   induction_phi = create_phi_node (vec_dest, iv_loop->header);
6871   set_vinfo_for_stmt (induction_phi,
6872                       new_stmt_vec_info (induction_phi, loop_vinfo));
6873   induc_def = PHI_RESULT (induction_phi);
6874
6875   /* Create the iv update inside the loop  */
6876   vec_def = make_ssa_name (vec_dest);
6877   new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
6878   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6879   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
6880
6881   /* Set the arguments of the phi node:  */
6882   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
6883   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
6884                UNKNOWN_LOCATION);
6885
6886   STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi;
6887
6888   /* In case that vectorization factor (VF) is bigger than the number
6889      of elements that we can fit in a vectype (nunits), we have to generate
6890      more than one vector stmt - i.e - we need to "unroll" the
6891      vector stmt by a factor VF/nunits.  For more details see documentation
6892      in vectorizable_operation.  */
6893
6894   if (ncopies > 1)
6895     {
6896       stmt_vec_info prev_stmt_vinfo;
6897       /* FORNOW. This restriction should be relaxed.  */
6898       gcc_assert (!nested_in_vect_loop);
6899
6900       /* Create the vector that holds the step of the induction.  */
6901       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6902         {
6903           expr = build_int_cst (integer_type_node, nunits);
6904           expr = fold_convert (TREE_TYPE (step_expr), expr);
6905         }
6906       else
6907         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
6908       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6909                               expr, step_expr);
6910       if (TREE_CODE (step_expr) == SSA_NAME)
6911         new_name = vect_init_vector (phi, new_name,
6912                                      TREE_TYPE (step_expr), NULL);
6913       t = unshare_expr (new_name);
6914       gcc_assert (CONSTANT_CLASS_P (new_name)
6915                   || TREE_CODE (new_name) == SSA_NAME);
6916       new_vec = build_vector_from_val (vectype, t);
6917       vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6918
6919       vec_def = induc_def;
6920       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
6921       for (i = 1; i < ncopies; i++)
6922         {
6923           /* vec_i = vec_prev + vec_step  */
6924           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
6925                                           vec_def, vec_step);
6926           vec_def = make_ssa_name (vec_dest, new_stmt);
6927           gimple_assign_set_lhs (new_stmt, vec_def);
6928
6929           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6930           set_vinfo_for_stmt (new_stmt,
6931                               new_stmt_vec_info (new_stmt, loop_vinfo));
6932           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
6933           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
6934         }
6935     }
6936
6937   if (nested_in_vect_loop)
6938     {
6939       /* Find the loop-closed exit-phi of the induction, and record
6940          the final vector of induction results:  */
6941       exit_phi = NULL;
6942       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6943         {
6944           gimple *use_stmt = USE_STMT (use_p);
6945           if (is_gimple_debug (use_stmt))
6946             continue;
6947
6948           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
6949             {
6950               exit_phi = use_stmt;
6951               break;
6952             }
6953         }
6954       if (exit_phi)
6955         {
6956           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
6957           /* FORNOW. Currently not supporting the case that an inner-loop induction
6958              is not used in the outer-loop (i.e. only outside the outer-loop).  */
6959           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
6960                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
6961
6962           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
6963           if (dump_enabled_p ())
6964             {
6965               dump_printf_loc (MSG_NOTE, vect_location,
6966                                "vector of inductions after inner-loop:");
6967               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
6968             }
6969         }
6970     }
6971
6972
6973   if (dump_enabled_p ())
6974     {
6975       dump_printf_loc (MSG_NOTE, vect_location,
6976                        "transform induction: created def-use cycle: ");
6977       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
6978       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6979                         SSA_NAME_DEF_STMT (vec_def), 0);
6980     }
6981
6982   return true;
6983 }
6984
6985 /* Function vectorizable_live_operation.
6986
6987    STMT computes a value that is used outside the loop.  Check if
6988    it can be supported.  */
6989
6990 bool
6991 vectorizable_live_operation (gimple *stmt,
6992                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6993                              slp_tree slp_node, int slp_index,
6994                              gimple **vec_stmt)
6995 {
6996   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6997   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6998   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6999   imm_use_iterator imm_iter;
7000   tree lhs, lhs_type, bitsize, vec_bitsize;
7001   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7002   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7003   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7004   gimple *use_stmt;
7005   auto_vec<tree> vec_oprnds;
7006
7007   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
7008
7009   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
7010     return false;
7011
7012   /* FORNOW.  CHECKME.  */
7013   if (nested_in_vect_loop_p (loop, stmt))
7014     return false;
7015
7016   /* If STMT is not relevant and it is a simple assignment and its inputs are
7017      invariant then it can remain in place, unvectorized.  The original last
7018      scalar value that it computes will be used.  */
7019   if (!STMT_VINFO_RELEVANT_P (stmt_info))
7020     {
7021       gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo));
7022       if (dump_enabled_p ())
7023         dump_printf_loc (MSG_NOTE, vect_location,
7024                          "statement is simple and uses invariant.  Leaving in "
7025                          "place.\n");
7026       return true;
7027     }
7028
7029   if (!vec_stmt)
7030     /* No transformation required.  */
7031     return true;
7032
7033   /* If stmt has a related stmt, then use that for getting the lhs.  */
7034   if (is_pattern_stmt_p (stmt_info))
7035     stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7036
7037   lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
7038         : gimple_get_lhs (stmt);
7039   lhs_type = TREE_TYPE (lhs);
7040
7041   bitsize = TYPE_SIZE (TREE_TYPE (vectype));
7042   vec_bitsize = TYPE_SIZE (vectype);
7043
7044   /* Get the vectorized lhs of STMT and the lane to use (counted in bits).  */
7045   tree vec_lhs, bitstart;
7046   if (slp_node)
7047     {
7048       gcc_assert (slp_index >= 0);
7049
7050       int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
7051       int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7052
7053       /* Get the last occurrence of the scalar index from the concatenation of
7054          all the slp vectors. Calculate which slp vector it is and the index
7055          within.  */
7056       int pos = (num_vec * nunits) - num_scalar + slp_index;
7057       int vec_entry = pos / nunits;
7058       int vec_index = pos % nunits;
7059
7060       /* Get the correct slp vectorized stmt.  */
7061       vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]);
7062
7063       /* Get entry to use.  */
7064       bitstart = build_int_cst (unsigned_type_node, vec_index);
7065       bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
7066     }
7067   else
7068     {
7069       enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
7070       vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt);
7071
7072       /* For multiple copies, get the last copy.  */
7073       for (int i = 1; i < ncopies; ++i)
7074         vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type,
7075                                                   vec_lhs);
7076
7077       /* Get the last lane in the vector.  */
7078       bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
7079     }
7080
7081   /* Create a new vectorized stmt for the uses of STMT and insert outside the
7082      loop.  */
7083   gimple_seq stmts = NULL;
7084   tree bftype = TREE_TYPE (vectype);
7085   if (VECTOR_BOOLEAN_TYPE_P (vectype))
7086     bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
7087   tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
7088   new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts,
7089                                    true, NULL_TREE);
7090   if (stmts)
7091     gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
7092
7093   /* Replace use of lhs with newly computed result.  If the use stmt is a
7094      single arg PHI, just replace all uses of PHI result.  It's necessary
7095      because lcssa PHI defining lhs may be before newly inserted stmt.  */
7096   use_operand_p use_p;
7097   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
7098     if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
7099         && !is_gimple_debug (use_stmt))
7100     {
7101       if (gimple_code (use_stmt) == GIMPLE_PHI
7102           && gimple_phi_num_args (use_stmt) == 1)
7103         {
7104           replace_uses_by (gimple_phi_result (use_stmt), new_tree);
7105         }
7106       else
7107         {
7108           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7109             SET_USE (use_p, new_tree);
7110         }
7111       update_stmt (use_stmt);
7112     }
7113
7114   return true;
7115 }
7116
7117 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
7118
7119 static void
7120 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
7121 {
7122   ssa_op_iter op_iter;
7123   imm_use_iterator imm_iter;
7124   def_operand_p def_p;
7125   gimple *ustmt;
7126
7127   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
7128     {
7129       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
7130         {
7131           basic_block bb;
7132
7133           if (!is_gimple_debug (ustmt))
7134             continue;
7135
7136           bb = gimple_bb (ustmt);
7137
7138           if (!flow_bb_inside_loop_p (loop, bb))
7139             {
7140               if (gimple_debug_bind_p (ustmt))
7141                 {
7142                   if (dump_enabled_p ())
7143                     dump_printf_loc (MSG_NOTE, vect_location,
7144                                      "killing debug use\n");
7145
7146                   gimple_debug_bind_reset_value (ustmt);
7147                   update_stmt (ustmt);
7148                 }
7149               else
7150                 gcc_unreachable ();
7151             }
7152         }
7153     }
7154 }
7155
7156 /* Given loop represented by LOOP_VINFO, return true if computation of
7157    LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
7158    otherwise.  */
7159
7160 static bool
7161 loop_niters_no_overflow (loop_vec_info loop_vinfo)
7162 {
7163   /* Constant case.  */
7164   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
7165     {
7166       tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
7167       tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
7168
7169       gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
7170       gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
7171       if (wi::to_widest (cst_nitersm1) < wi::to_widest (cst_niters))
7172         return true;
7173     }
7174
7175   widest_int max;
7176   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7177   /* Check the upper bound of loop niters.  */
7178   if (get_max_loop_iterations (loop, &max))
7179     {
7180       tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
7181       signop sgn = TYPE_SIGN (type);
7182       widest_int type_max = widest_int::from (wi::max_value (type), sgn);
7183       if (max < type_max)
7184         return true;
7185     }
7186   return false;
7187 }
7188
7189 /* Scale profiling counters by estimation for LOOP which is vectorized
7190    by factor VF.  */
7191
7192 static void
7193 scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
7194 {
7195   edge preheader = loop_preheader_edge (loop);
7196   /* Reduce loop iterations by the vectorization factor.  */
7197   gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
7198   profile_count freq_h = loop->header->count, freq_e = preheader->count;
7199
7200   /* Use frequency only if counts are zero.  */
7201   if (!(freq_h > 0) && !(freq_e > 0))
7202     {
7203       freq_h = profile_count::from_gcov_type (loop->header->frequency);
7204       freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader));
7205     }
7206   if (freq_h > 0)
7207     {
7208       profile_probability p;
7209
7210       /* Avoid dropping loop body profile counter to 0 because of zero count
7211          in loop's preheader.  */
7212       if (!(freq_e > profile_count::from_gcov_type (1)))
7213        freq_e = profile_count::from_gcov_type (1);
7214       p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h);
7215       scale_loop_frequencies (loop, p);
7216     }
7217
7218   basic_block exit_bb = single_pred (loop->latch);
7219   edge exit_e = single_exit (loop);
7220   exit_e->count = loop_preheader_edge (loop)->count;
7221   exit_e->probability = profile_probability::always ()
7222                                  .apply_scale (1, new_est_niter + 1);
7223
7224   edge exit_l = single_pred_edge (loop->latch);
7225   profile_probability prob = exit_l->probability;
7226   exit_l->probability = exit_e->probability.invert ();
7227   exit_l->count = exit_bb->count - exit_e->count;
7228   if (prob.initialized_p () && exit_l->probability.initialized_p ())
7229     scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
7230 }
7231
7232 /* Function vect_transform_loop.
7233
7234    The analysis phase has determined that the loop is vectorizable.
7235    Vectorize the loop - created vectorized stmts to replace the scalar
7236    stmts in the loop, and update the loop exit condition.
7237    Returns scalar epilogue loop if any.  */
7238
7239 struct loop *
7240 vect_transform_loop (loop_vec_info loop_vinfo)
7241 {
7242   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7243   struct loop *epilogue = NULL;
7244   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7245   int nbbs = loop->num_nodes;
7246   int i;
7247   tree niters_vector = NULL;
7248   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7249   bool grouped_store;
7250   bool slp_scheduled = false;
7251   gimple *stmt, *pattern_stmt;
7252   gimple_seq pattern_def_seq = NULL;
7253   gimple_stmt_iterator pattern_def_si = gsi_none ();
7254   bool transform_pattern_stmt = false;
7255   bool check_profitability = false;
7256   int th;
7257
7258   if (dump_enabled_p ())
7259     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
7260
7261   /* Use the more conservative vectorization threshold.  If the number
7262      of iterations is constant assume the cost check has been performed
7263      by our caller.  If the threshold makes all loops profitable that
7264      run at least the vectorization factor number of times checking
7265      is pointless, too.  */
7266   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
7267   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo)
7268       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
7269     {
7270       if (dump_enabled_p ())
7271         dump_printf_loc (MSG_NOTE, vect_location,
7272                          "Profitability threshold is %d loop iterations.\n",
7273                          th);
7274       check_profitability = true;
7275     }
7276
7277   /* Make sure there exists a single-predecessor exit bb.  Do this before
7278      versioning.   */
7279   edge e = single_exit (loop);
7280   if (! single_pred_p (e->dest))
7281     {
7282       split_loop_exit_edge (e);
7283       if (dump_enabled_p ())
7284         dump_printf (MSG_NOTE, "split exit edge\n");
7285     }
7286
7287   /* Version the loop first, if required, so the profitability check
7288      comes first.  */
7289
7290   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
7291     {
7292       vect_loop_versioning (loop_vinfo, th, check_profitability);
7293       check_profitability = false;
7294     }
7295
7296   /* Make sure there exists a single-predecessor exit bb also on the
7297      scalar loop copy.  Do this after versioning but before peeling
7298      so CFG structure is fine for both scalar and if-converted loop
7299      to make slpeel_duplicate_current_defs_from_edges face matched
7300      loop closed PHI nodes on the exit.  */
7301   if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
7302     {
7303       e = single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo));
7304       if (! single_pred_p (e->dest))
7305         {
7306           split_loop_exit_edge (e);
7307           if (dump_enabled_p ())
7308             dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
7309         }
7310     }
7311
7312   tree niters = vect_build_loop_niters (loop_vinfo);
7313   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
7314   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
7315   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
7316   epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
7317                               check_profitability, niters_no_overflow);
7318   if (niters_vector == NULL_TREE)
7319     {
7320       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
7321         niters_vector
7322           = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7323                            LOOP_VINFO_INT_NITERS (loop_vinfo) / vf);
7324       else
7325         vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
7326                                      niters_no_overflow);
7327     }
7328
7329   /* 1) Make sure the loop header has exactly two entries
7330      2) Make sure we have a preheader basic block.  */
7331
7332   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7333
7334   split_edge (loop_preheader_edge (loop));
7335
7336   /* FORNOW: the vectorizer supports only loops which body consist
7337      of one basic block (header + empty latch). When the vectorizer will
7338      support more involved loop forms, the order by which the BBs are
7339      traversed need to be reconsidered.  */
7340
7341   for (i = 0; i < nbbs; i++)
7342     {
7343       basic_block bb = bbs[i];
7344       stmt_vec_info stmt_info;
7345
7346       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
7347            gsi_next (&si))
7348         {
7349           gphi *phi = si.phi ();
7350           if (dump_enabled_p ())
7351             {
7352               dump_printf_loc (MSG_NOTE, vect_location,
7353                                "------>vectorizing phi: ");
7354               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
7355             }
7356           stmt_info = vinfo_for_stmt (phi);
7357           if (!stmt_info)
7358             continue;
7359
7360           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
7361             vect_loop_kill_debug_uses (loop, phi);
7362
7363           if (!STMT_VINFO_RELEVANT_P (stmt_info)
7364               && !STMT_VINFO_LIVE_P (stmt_info))
7365             continue;
7366
7367           if (STMT_VINFO_VECTYPE (stmt_info)
7368               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7369                   != (unsigned HOST_WIDE_INT) vf)
7370               && dump_enabled_p ())
7371             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7372
7373           if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
7374                || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
7375                || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
7376               && ! PURE_SLP_STMT (stmt_info))
7377             {
7378               if (dump_enabled_p ())
7379                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
7380               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
7381             }
7382         }
7383
7384       pattern_stmt = NULL;
7385       for (gimple_stmt_iterator si = gsi_start_bb (bb);
7386            !gsi_end_p (si) || transform_pattern_stmt;)
7387         {
7388           bool is_store;
7389
7390           if (transform_pattern_stmt)
7391             stmt = pattern_stmt;
7392           else
7393             {
7394               stmt = gsi_stmt (si);
7395               /* During vectorization remove existing clobber stmts.  */
7396               if (gimple_clobber_p (stmt))
7397                 {
7398                   unlink_stmt_vdef (stmt);
7399                   gsi_remove (&si, true);
7400                   release_defs (stmt);
7401                   continue;
7402                 }
7403             }
7404
7405           if (dump_enabled_p ())
7406             {
7407               dump_printf_loc (MSG_NOTE, vect_location,
7408                                "------>vectorizing statement: ");
7409               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7410             }
7411
7412           stmt_info = vinfo_for_stmt (stmt);
7413
7414           /* vector stmts created in the outer-loop during vectorization of
7415              stmts in an inner-loop may not have a stmt_info, and do not
7416              need to be vectorized.  */
7417           if (!stmt_info)
7418             {
7419               gsi_next (&si);
7420               continue;
7421             }
7422
7423           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
7424             vect_loop_kill_debug_uses (loop, stmt);
7425
7426           if (!STMT_VINFO_RELEVANT_P (stmt_info)
7427               && !STMT_VINFO_LIVE_P (stmt_info))
7428             {
7429               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7430                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7431                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7432                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7433                 {
7434                   stmt = pattern_stmt;
7435                   stmt_info = vinfo_for_stmt (stmt);
7436                 }
7437               else
7438                 {
7439                   gsi_next (&si);
7440                   continue;
7441                 }
7442             }
7443           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7444                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7445                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7446                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7447             transform_pattern_stmt = true;
7448
7449           /* If pattern statement has def stmts, vectorize them too.  */
7450           if (is_pattern_stmt_p (stmt_info))
7451             {
7452               if (pattern_def_seq == NULL)
7453                 {
7454                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
7455                   pattern_def_si = gsi_start (pattern_def_seq);
7456                 }
7457               else if (!gsi_end_p (pattern_def_si))
7458                 gsi_next (&pattern_def_si);
7459               if (pattern_def_seq != NULL)
7460                 {
7461                   gimple *pattern_def_stmt = NULL;
7462                   stmt_vec_info pattern_def_stmt_info = NULL;
7463
7464                   while (!gsi_end_p (pattern_def_si))
7465                     {
7466                       pattern_def_stmt = gsi_stmt (pattern_def_si);
7467                       pattern_def_stmt_info
7468                         = vinfo_for_stmt (pattern_def_stmt);
7469                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
7470                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
7471                         break;
7472                       gsi_next (&pattern_def_si);
7473                     }
7474
7475                   if (!gsi_end_p (pattern_def_si))
7476                     {
7477                       if (dump_enabled_p ())
7478                         {
7479                           dump_printf_loc (MSG_NOTE, vect_location,
7480                                            "==> vectorizing pattern def "
7481                                            "stmt: ");
7482                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
7483                                             pattern_def_stmt, 0);
7484                         }
7485
7486                       stmt = pattern_def_stmt;
7487                       stmt_info = pattern_def_stmt_info;
7488                     }
7489                   else
7490                     {
7491                       pattern_def_si = gsi_none ();
7492                       transform_pattern_stmt = false;
7493                     }
7494                 }
7495               else
7496                 transform_pattern_stmt = false;
7497             }
7498
7499           if (STMT_VINFO_VECTYPE (stmt_info))
7500             {
7501               unsigned int nunits
7502                 = (unsigned int)
7503                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7504               if (!STMT_SLP_TYPE (stmt_info)
7505                   && nunits != (unsigned int) vf
7506                   && dump_enabled_p ())
7507                   /* For SLP VF is set according to unrolling factor, and not
7508                      to vector size, hence for SLP this print is not valid.  */
7509                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7510             }
7511
7512           /* SLP. Schedule all the SLP instances when the first SLP stmt is
7513              reached.  */
7514           if (STMT_SLP_TYPE (stmt_info))
7515             {
7516               if (!slp_scheduled)
7517                 {
7518                   slp_scheduled = true;
7519
7520                   if (dump_enabled_p ())
7521                     dump_printf_loc (MSG_NOTE, vect_location,
7522                                      "=== scheduling SLP instances ===\n");
7523
7524                   vect_schedule_slp (loop_vinfo);
7525                 }
7526
7527               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
7528               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
7529                 {
7530                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7531                     {
7532                       pattern_def_seq = NULL;
7533                       gsi_next (&si);
7534                     }
7535                   continue;
7536                 }
7537             }
7538
7539           /* -------- vectorize statement ------------ */
7540           if (dump_enabled_p ())
7541             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
7542
7543           grouped_store = false;
7544           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
7545           if (is_store)
7546             {
7547               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7548                 {
7549                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7550                      interleaving chain was completed - free all the stores in
7551                      the chain.  */
7552                   gsi_next (&si);
7553                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
7554                 }
7555               else
7556                 {
7557                   /* Free the attached stmt_vec_info and remove the stmt.  */
7558                   gimple *store = gsi_stmt (si);
7559                   free_stmt_vec_info (store);
7560                   unlink_stmt_vdef (store);
7561                   gsi_remove (&si, true);
7562                   release_defs (store);
7563                 }
7564
7565               /* Stores can only appear at the end of pattern statements.  */
7566               gcc_assert (!transform_pattern_stmt);
7567               pattern_def_seq = NULL;
7568             }
7569           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7570             {
7571               pattern_def_seq = NULL;
7572               gsi_next (&si);
7573             }
7574         }                       /* stmts in BB */
7575     }                           /* BBs in loop */
7576
7577   slpeel_make_loop_iterate_ntimes (loop, niters_vector);
7578
7579   scale_profile_for_vect_loop (loop, vf);
7580
7581   /* The minimum number of iterations performed by the epilogue.  This
7582      is 1 when peeling for gaps because we always need a final scalar
7583      iteration.  */
7584   int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0;
7585   /* +1 to convert latch counts to loop iteration counts,
7586      -min_epilogue_iters to remove iterations that cannot be performed
7587        by the vector code.  */
7588   int bias = 1 - min_epilogue_iters;
7589   /* In these calculations the "- 1" converts loop iteration counts
7590      back to latch counts.  */
7591   if (loop->any_upper_bound)
7592     loop->nb_iterations_upper_bound
7593       = wi::udiv_floor (loop->nb_iterations_upper_bound + bias, vf) - 1;
7594   if (loop->any_likely_upper_bound)
7595     loop->nb_iterations_likely_upper_bound
7596       = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias, vf) - 1;
7597   if (loop->any_estimate)
7598     loop->nb_iterations_estimate
7599       = wi::udiv_floor (loop->nb_iterations_estimate + bias, vf) - 1;
7600
7601   if (dump_enabled_p ())
7602     {
7603       if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7604         {
7605           dump_printf_loc (MSG_NOTE, vect_location,
7606                            "LOOP VECTORIZED\n");
7607           if (loop->inner)
7608             dump_printf_loc (MSG_NOTE, vect_location,
7609                              "OUTER LOOP VECTORIZED\n");
7610           dump_printf (MSG_NOTE, "\n");
7611         }
7612       else
7613         dump_printf_loc (MSG_NOTE, vect_location,
7614                          "LOOP EPILOGUE VECTORIZED (VS=%d)\n",
7615                          current_vector_size);
7616     }
7617
7618   /* Free SLP instances here because otherwise stmt reference counting
7619      won't work.  */
7620   slp_instance instance;
7621   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
7622     vect_free_slp_instance (instance);
7623   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
7624   /* Clear-up safelen field since its value is invalid after vectorization
7625      since vectorized loop can have loop-carried dependencies.  */
7626   loop->safelen = 0;
7627
7628   /* Don't vectorize epilogue for epilogue.  */
7629   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7630     epilogue = NULL;
7631
7632   if (epilogue)
7633     {
7634         unsigned int vector_sizes
7635           = targetm.vectorize.autovectorize_vector_sizes ();
7636         vector_sizes &= current_vector_size - 1;
7637
7638         if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
7639           epilogue = NULL;
7640         else if (!vector_sizes)
7641           epilogue = NULL;
7642         else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7643                  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
7644           {
7645             int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
7646             int ratio = current_vector_size / smallest_vec_size;
7647             int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
7648               - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
7649             eiters = eiters % vf;
7650
7651             epilogue->nb_iterations_upper_bound = eiters - 1;
7652
7653             if (eiters < vf / ratio)
7654               epilogue = NULL;
7655             }
7656     }
7657
7658   if (epilogue)
7659     {
7660       epilogue->force_vectorize = loop->force_vectorize;
7661       epilogue->safelen = loop->safelen;
7662       epilogue->dont_vectorize = false;
7663
7664       /* We may need to if-convert epilogue to vectorize it.  */
7665       if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
7666         tree_if_conversion (epilogue);
7667     }
7668
7669   return epilogue;
7670 }
7671
7672 /* The code below is trying to perform simple optimization - revert
7673    if-conversion for masked stores, i.e. if the mask of a store is zero
7674    do not perform it and all stored value producers also if possible.
7675    For example,
7676      for (i=0; i<n; i++)
7677        if (c[i])
7678         {
7679           p1[i] += 1;
7680           p2[i] = p3[i] +2;
7681         }
7682    this transformation will produce the following semi-hammock:
7683
7684    if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
7685      {
7686        vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
7687        vect__12.22_172 = vect__11.19_170 + vect_cst__171;
7688        MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
7689        vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
7690        vect__19.28_184 = vect__18.25_182 + vect_cst__183;
7691        MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
7692      }
7693 */
7694
7695 void
7696 optimize_mask_stores (struct loop *loop)
7697 {
7698   basic_block *bbs = get_loop_body (loop);
7699   unsigned nbbs = loop->num_nodes;
7700   unsigned i;
7701   basic_block bb;
7702   struct loop *bb_loop;
7703   gimple_stmt_iterator gsi;
7704   gimple *stmt;
7705   auto_vec<gimple *> worklist;
7706
7707   vect_location = find_loop_location (loop);
7708   /* Pick up all masked stores in loop if any.  */
7709   for (i = 0; i < nbbs; i++)
7710     {
7711       bb = bbs[i];
7712       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
7713            gsi_next (&gsi))
7714         {
7715           stmt = gsi_stmt (gsi);
7716           if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
7717             worklist.safe_push (stmt);
7718         }
7719     }
7720
7721   free (bbs);
7722   if (worklist.is_empty ())
7723     return;
7724
7725   /* Loop has masked stores.  */
7726   while (!worklist.is_empty ())
7727     {
7728       gimple *last, *last_store;
7729       edge e, efalse;
7730       tree mask;
7731       basic_block store_bb, join_bb;
7732       gimple_stmt_iterator gsi_to;
7733       tree vdef, new_vdef;
7734       gphi *phi;
7735       tree vectype;
7736       tree zero;
7737
7738       last = worklist.pop ();
7739       mask = gimple_call_arg (last, 2);
7740       bb = gimple_bb (last);
7741       /* Create then_bb and if-then structure in CFG, then_bb belongs to
7742          the same loop as if_bb.  It could be different to LOOP when two
7743          level loop-nest is vectorized and mask_store belongs to the inner
7744          one.  */
7745       e = split_block (bb, last);
7746       bb_loop = bb->loop_father;
7747       gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
7748       join_bb = e->dest;
7749       store_bb = create_empty_bb (bb);
7750       add_bb_to_loop (store_bb, bb_loop);
7751       e->flags = EDGE_TRUE_VALUE;
7752       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
7753       /* Put STORE_BB to likely part.  */
7754       efalse->probability = profile_probability::unlikely ();
7755       store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse);
7756       make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
7757       if (dom_info_available_p (CDI_DOMINATORS))
7758         set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
7759       if (dump_enabled_p ())
7760         dump_printf_loc (MSG_NOTE, vect_location,
7761                          "Create new block %d to sink mask stores.",
7762                          store_bb->index);
7763       /* Create vector comparison with boolean result.  */
7764       vectype = TREE_TYPE (mask);
7765       zero = build_zero_cst (vectype);
7766       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
7767       gsi = gsi_last_bb (bb);
7768       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7769       /* Create new PHI node for vdef of the last masked store:
7770          .MEM_2 = VDEF <.MEM_1>
7771          will be converted to
7772          .MEM.3 = VDEF <.MEM_1>
7773          and new PHI node will be created in join bb
7774          .MEM_2 = PHI <.MEM_1, .MEM_3>
7775       */
7776       vdef = gimple_vdef (last);
7777       new_vdef = make_ssa_name (gimple_vop (cfun), last);
7778       gimple_set_vdef (last, new_vdef);
7779       phi = create_phi_node (vdef, join_bb);
7780       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
7781
7782       /* Put all masked stores with the same mask to STORE_BB if possible.  */
7783       while (true)
7784         {
7785           gimple_stmt_iterator gsi_from;
7786           gimple *stmt1 = NULL;
7787
7788           /* Move masked store to STORE_BB.  */
7789           last_store = last;
7790           gsi = gsi_for_stmt (last);
7791           gsi_from = gsi;
7792           /* Shift GSI to the previous stmt for further traversal.  */
7793           gsi_prev (&gsi);
7794           gsi_to = gsi_start_bb (store_bb);
7795           gsi_move_before (&gsi_from, &gsi_to);
7796           /* Setup GSI_TO to the non-empty block start.  */
7797           gsi_to = gsi_start_bb (store_bb);
7798           if (dump_enabled_p ())
7799             {
7800               dump_printf_loc (MSG_NOTE, vect_location,
7801                                "Move stmt to created bb\n");
7802               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
7803             }
7804           /* Move all stored value producers if possible.  */
7805           while (!gsi_end_p (gsi))
7806             {
7807               tree lhs;
7808               imm_use_iterator imm_iter;
7809               use_operand_p use_p;
7810               bool res;
7811
7812               /* Skip debug statements.  */
7813               if (is_gimple_debug (gsi_stmt (gsi)))
7814                 {
7815                   gsi_prev (&gsi);
7816                   continue;
7817                 }
7818               stmt1 = gsi_stmt (gsi);
7819               /* Do not consider statements writing to memory or having
7820                  volatile operand.  */
7821               if (gimple_vdef (stmt1)
7822                   || gimple_has_volatile_ops (stmt1))
7823                 break;
7824               gsi_from = gsi;
7825               gsi_prev (&gsi);
7826               lhs = gimple_get_lhs (stmt1);
7827               if (!lhs)
7828                 break;
7829
7830               /* LHS of vectorized stmt must be SSA_NAME.  */
7831               if (TREE_CODE (lhs) != SSA_NAME)
7832                 break;
7833
7834               if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
7835                 {
7836                   /* Remove dead scalar statement.  */
7837                   if (has_zero_uses (lhs))
7838                     {
7839                       gsi_remove (&gsi_from, true);
7840                       continue;
7841                     }
7842                 }
7843
7844               /* Check that LHS does not have uses outside of STORE_BB.  */
7845               res = true;
7846               FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
7847                 {
7848                   gimple *use_stmt;
7849                   use_stmt = USE_STMT (use_p);
7850                   if (is_gimple_debug (use_stmt))
7851                     continue;
7852                   if (gimple_bb (use_stmt) != store_bb)
7853                     {
7854                       res = false;
7855                       break;
7856                     }
7857                 }
7858               if (!res)
7859                 break;
7860
7861               if (gimple_vuse (stmt1)
7862                   && gimple_vuse (stmt1) != gimple_vuse (last_store))
7863                 break;
7864
7865               /* Can move STMT1 to STORE_BB.  */
7866               if (dump_enabled_p ())
7867                 {
7868                   dump_printf_loc (MSG_NOTE, vect_location,
7869                                    "Move stmt to created bb\n");
7870                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
7871                 }
7872               gsi_move_before (&gsi_from, &gsi_to);
7873               /* Shift GSI_TO for further insertion.  */
7874               gsi_prev (&gsi_to);
7875             }
7876           /* Put other masked stores with the same mask to STORE_BB.  */
7877           if (worklist.is_empty ()
7878               || gimple_call_arg (worklist.last (), 2) != mask
7879               || worklist.last () != stmt1)
7880             break;
7881           last = worklist.pop ();
7882         }
7883       add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
7884     }
7885 }