gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "cfghooks.h"
  31 #include "tree-pass.h"
  32 #include "ssa.h"
  33 #include "optabs-tree.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "cfganal.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-ssa-loop.h"
  45 #include "cfgloop.h"
  46 #include "params.h"
  47 #include "tree-scalar-evolution.h"
  48 #include "tree-vectorizer.h"
  49 #include "gimple-fold.h"
  50 #include "cgraph.h"
  51 #include "tree-cfg.h"
  52
  53 /* Loop Vectorization Pass.
  54
  55    This pass tries to vectorize loops.
  56
  57    For example, the vectorizer transforms the following simple loop:
  58
  59         short a[N]; short b[N]; short c[N]; int i;
  60
  61         for (i=0; i<N; i++){
  62           a[i] = b[i] + c[i];
  63         }
  64
  65    as if it was manually vectorized by rewriting the source code into:
  66
  67         typedef int __attribute__((mode(V8HI))) v8hi;
  68         short a[N];  short b[N]; short c[N];   int i;
  69         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  70         v8hi va, vb, vc;
  71
  72         for (i=0; i<N/8; i++){
  73           vb = pb[i];
  74           vc = pc[i];
  75           va = vb + vc;
  76           pa[i] = va;
  77         }
  78
  79         The main entry to this pass is vectorize_loops(), in which
  80    the vectorizer applies a set of analyses on a given set of loops,
  81    followed by the actual vectorization transformation for the loops that
  82    had successfully passed the analysis phase.
  83         Throughout this pass we make a distinction between two types of
  84    data: scalars (which are represented by SSA_NAMES), and memory references
  85    ("data-refs").  These two types of data require different handling both
  86    during analysis and transformation. The types of data-refs that the
  87    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  88    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  89    accesses are required to have a simple (consecutive) access pattern.
  90
  91    Analysis phase:
  92    ===============
  93         The driver for the analysis phase is vect_analyze_loop().
  94    It applies a set of analyses, some of which rely on the scalar evolution
  95    analyzer (scev) developed by Sebastian Pop.
  96
  97         During the analysis phase the vectorizer records some information
  98    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  99    loop, as well as general information about the loop as a whole, which is
 100    recorded in a "loop_vec_info" struct attached to each loop.
 101
 102    Transformation phase:
 103    =====================
 104         The loop transformation phase scans all the stmts in the loop, and
 105    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 106    the loop that needs to be vectorized.  It inserts the vector code sequence
 107    just before the scalar stmt S, and records a pointer to the vector code
 108    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 109    attached to S).  This pointer will be used for the vectorization of following
 110    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 111    otherwise, we rely on dead code elimination for removing it.
 112
 113         For example, say stmt S1 was vectorized into stmt VS1:
 114
 115    VS1: vb = px[i];
 116    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 117    S2:  a = b;
 118
 119    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 120    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 121    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 122    resulting sequence would be:
 123
 124    VS1: vb = px[i];
 125    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 126    VS2: va = vb;
 127    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 128
 129         Operands that are not SSA_NAMEs, are data-refs that appear in
 130    load/store operations (like 'x[i]' in S1), and are handled differently.
 131
 132    Target modeling:
 133    =================
 134         Currently the only target specific information that is used is the
 135    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 136    Targets that can support different sizes of vectors, for now will need
 137    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 138    flexibility will be added in the future.
 139
 140         Since we only vectorize operations which vector form can be
 141    expressed using existing tree codes, to verify that an operation is
 142    supported, the vectorizer checks the relevant optab at the relevant
 143    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 144    the value found is CODE_FOR_nothing, then there's no target support, and
 145    we can't vectorize the stmt.
 146
 147    For additional information on this project see:
 148    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 149 */
 150
 151 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 152
 153 /* Function vect_determine_vectorization_factor
 154
 155    Determine the vectorization factor (VF).  VF is the number of data elements
 156    that are operated upon in parallel in a single iteration of the vectorized
 157    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 158    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 159    elements can fit in a single vector register.
 160
 161    We currently support vectorization of loops in which all types operated upon
 162    are of the same size.  Therefore this function currently sets VF according to
 163    the size of the types operated upon, and fails if there are multiple sizes
 164    in the loop.
 165
 166    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 167    original loop:
 168         for (i=0; i<N; i++){
 169           a[i] = b[i] + c[i];
 170         }
 171
 172    vectorized loop:
 173         for (i=0; i<N; i+=VF){
 174           a[i:VF] = b[i:VF] + c[i:VF];
 175         }
 176 */
 177
 178 static bool
 179 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 180 {
 181   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 182   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 183   unsigned nbbs = loop->num_nodes;
 184   unsigned int vectorization_factor = 0;
 185   tree scalar_type;
 186   gphi *phi;
 187   tree vectype;
 188   unsigned int nunits;
 189   stmt_vec_info stmt_info;
 190   unsigned i;
 191   HOST_WIDE_INT dummy;
 192   gimple *stmt, *pattern_stmt = NULL;
 193   gimple_seq pattern_def_seq = NULL;
 194   gimple_stmt_iterator pattern_def_si = gsi_none ();
 195   bool analyze_pattern_stmt = false;
 196   bool bool_result;
 197   auto_vec<stmt_vec_info> mask_producers;
 198
 199   if (dump_enabled_p ())
 200     dump_printf_loc (MSG_NOTE, vect_location,
 201                      "=== vect_determine_vectorization_factor ===\n");
 202
 203   for (i = 0; i < nbbs; i++)
 204     {
 205       basic_block bb = bbs[i];
 206
 207       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 208            gsi_next (&si))
 209         {
 210           phi = si.phi ();
 211           stmt_info = vinfo_for_stmt (phi);
 212           if (dump_enabled_p ())
 213             {
 214               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 215               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 216             }
 217
 218           gcc_assert (stmt_info);
 219
 220           if (STMT_VINFO_RELEVANT_P (stmt_info)
 221               || STMT_VINFO_LIVE_P (stmt_info))
 222             {
 223               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 224               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 225
 226               if (dump_enabled_p ())
 227                 {
 228                   dump_printf_loc (MSG_NOTE, vect_location,
 229                                    "get vectype for scalar type:  ");
 230                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 231                   dump_printf (MSG_NOTE, "\n");
 232                 }
 233
 234               vectype = get_vectype_for_scalar_type (scalar_type);
 235               if (!vectype)
 236                 {
 237                   if (dump_enabled_p ())
 238                     {
 239                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 240                                        "not vectorized: unsupported "
 241                                        "data-type ");
 242                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 243                                          scalar_type);
 244                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 245                     }
 246                   return false;
 247                 }
 248               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 249
 250               if (dump_enabled_p ())
 251                 {
 252                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 253                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 254                   dump_printf (MSG_NOTE, "\n");
 255                 }
 256
 257               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 258               if (dump_enabled_p ())
 259                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 260                                  nunits);
 261
 262               if (!vectorization_factor
 263                   || (nunits > vectorization_factor))
 264                 vectorization_factor = nunits;
 265             }
 266         }
 267
 268       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 269            !gsi_end_p (si) || analyze_pattern_stmt;)
 270         {
 271           tree vf_vectype;
 272
 273           if (analyze_pattern_stmt)
 274             stmt = pattern_stmt;
 275           else
 276             stmt = gsi_stmt (si);
 277
 278           stmt_info = vinfo_for_stmt (stmt);
 279
 280           if (dump_enabled_p ())
 281             {
 282               dump_printf_loc (MSG_NOTE, vect_location,
 283                                "==> examining statement: ");
 284               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 285             }
 286
 287           gcc_assert (stmt_info);
 288
 289           /* Skip stmts which do not need to be vectorized.  */
 290           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 291                && !STMT_VINFO_LIVE_P (stmt_info))
 292               || gimple_clobber_p (stmt))
 293             {
 294               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 295                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 296                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 297                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 298                 {
 299                   stmt = pattern_stmt;
 300                   stmt_info = vinfo_for_stmt (pattern_stmt);
 301                   if (dump_enabled_p ())
 302                     {
 303                       dump_printf_loc (MSG_NOTE, vect_location,
 304                                        "==> examining pattern statement: ");
 305                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 306                     }
 307                 }
 308               else
 309                 {
 310                   if (dump_enabled_p ())
 311                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 312                   gsi_next (&si);
 313                   continue;
 314                 }
 315             }
 316           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 317                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 318                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 319                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 320             analyze_pattern_stmt = true;
 321
 322           /* If a pattern statement has def stmts, analyze them too.  */
 323           if (is_pattern_stmt_p (stmt_info))
 324             {
 325               if (pattern_def_seq == NULL)
 326                 {
 327                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 328                   pattern_def_si = gsi_start (pattern_def_seq);
 329                 }
 330               else if (!gsi_end_p (pattern_def_si))
 331                 gsi_next (&pattern_def_si);
 332               if (pattern_def_seq != NULL)
 333                 {
 334                   gimple *pattern_def_stmt = NULL;
 335                   stmt_vec_info pattern_def_stmt_info = NULL;
 336
 337                   while (!gsi_end_p (pattern_def_si))
 338                     {
 339                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 340                       pattern_def_stmt_info
 341                         = vinfo_for_stmt (pattern_def_stmt);
 342                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 343                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 344                         break;
 345                       gsi_next (&pattern_def_si);
 346                     }
 347
 348                   if (!gsi_end_p (pattern_def_si))
 349                     {
 350                       if (dump_enabled_p ())
 351                         {
 352                           dump_printf_loc (MSG_NOTE, vect_location,
 353                                            "==> examining pattern def stmt: ");
 354                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 355                                             pattern_def_stmt, 0);
 356                         }
 357
 358                       stmt = pattern_def_stmt;
 359                       stmt_info = pattern_def_stmt_info;
 360                     }
 361                   else
 362                     {
 363                       pattern_def_si = gsi_none ();
 364                       analyze_pattern_stmt = false;
 365                     }
 366                 }
 367               else
 368                 analyze_pattern_stmt = false;
 369             }
 370
 371           if (gimple_get_lhs (stmt) == NULL_TREE
 372               /* MASK_STORE has no lhs, but is ok.  */
 373               && (!is_gimple_call (stmt)
 374                   || !gimple_call_internal_p (stmt)
 375                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 376             {
 377               if (is_gimple_call (stmt))
 378                 {
 379                   /* Ignore calls with no lhs.  These must be calls to
 380                      #pragma omp simd functions, and what vectorization factor
 381                      it really needs can't be determined until
 382                      vectorizable_simd_clone_call.  */
 383                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 384                     {
 385                       pattern_def_seq = NULL;
 386                       gsi_next (&si);
 387                     }
 388                   continue;
 389                 }
 390               if (dump_enabled_p ())
 391                 {
 392                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 393                                    "not vectorized: irregular stmt.");
 394                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 395                                     0);
 396                 }
 397               return false;
 398             }
 399
 400           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 401             {
 402               if (dump_enabled_p ())
 403                 {
 404                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 405                                    "not vectorized: vector stmt in loop:");
 406                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 407                 }
 408               return false;
 409             }
 410
 411           bool_result = false;
 412
 413           if (STMT_VINFO_VECTYPE (stmt_info))
 414             {
 415               /* The only case when a vectype had been already set is for stmts
 416                  that contain a dataref, or for "pattern-stmts" (stmts
 417                  generated by the vectorizer to represent/replace a certain
 418                  idiom).  */
 419               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 420                           || is_pattern_stmt_p (stmt_info)
 421                           || !gsi_end_p (pattern_def_si));
 422               vectype = STMT_VINFO_VECTYPE (stmt_info);
 423             }
 424           else
 425             {
 426               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 427               if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
 428                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 429               else
 430                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 431
 432               /* Bool ops don't participate in vectorization factor
 433                  computation.  For comparison use compared types to
 434                  compute a factor.  */
 435               if (TREE_CODE (scalar_type) == BOOLEAN_TYPE
 436                   && is_gimple_assign (stmt)
 437                   && gimple_assign_rhs_code (stmt) != COND_EXPR)
 438                 {
 439                   if (STMT_VINFO_RELEVANT_P (stmt_info)
 440                       || STMT_VINFO_LIVE_P (stmt_info))
 441                     mask_producers.safe_push (stmt_info);
 442                   bool_result = true;
 443
 444                   if (gimple_code (stmt) == GIMPLE_ASSIGN
 445                       && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
 446                          == tcc_comparison
 447                       && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
 448                          != BOOLEAN_TYPE)
 449                     scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 450                   else
 451                     {
 452                       if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 453                         {
 454                           pattern_def_seq = NULL;
 455                           gsi_next (&si);
 456                         }
 457                       continue;
 458                     }
 459                 }
 460
 461               if (dump_enabled_p ())
 462                 {
 463                   dump_printf_loc (MSG_NOTE, vect_location,
 464                                    "get vectype for scalar type:  ");
 465                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 466                   dump_printf (MSG_NOTE, "\n");
 467                 }
 468               vectype = get_vectype_for_scalar_type (scalar_type);
 469               if (!vectype)
 470                 {
 471                   if (dump_enabled_p ())
 472                     {
 473                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 474                                        "not vectorized: unsupported "
 475                                        "data-type ");
 476                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 477                                          scalar_type);
 478                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 479                     }
 480                   return false;
 481                 }
 482
 483               if (!bool_result)
 484                 STMT_VINFO_VECTYPE (stmt_info) = vectype;
 485
 486               if (dump_enabled_p ())
 487                 {
 488                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 489                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 490                   dump_printf (MSG_NOTE, "\n");
 491                 }
 492             }
 493
 494           /* Don't try to compute VF out scalar types if we stmt
 495              produces boolean vector.  Use result vectype instead.  */
 496           if (VECTOR_BOOLEAN_TYPE_P (vectype))
 497             vf_vectype = vectype;
 498           else
 499             {
 500               /* The vectorization factor is according to the smallest
 501                  scalar type (or the largest vector size, but we only
 502                  support one vector size per loop).  */
 503               if (!bool_result)
 504                 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 505                                                              &dummy);
 506               if (dump_enabled_p ())
 507                 {
 508                   dump_printf_loc (MSG_NOTE, vect_location,
 509                                    "get vectype for scalar type:  ");
 510                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 511                   dump_printf (MSG_NOTE, "\n");
 512                 }
 513               vf_vectype = get_vectype_for_scalar_type (scalar_type);
 514             }
 515           if (!vf_vectype)
 516             {
 517               if (dump_enabled_p ())
 518                 {
 519                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 520                                    "not vectorized: unsupported data-type ");
 521                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 522                                      scalar_type);
 523                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 524                 }
 525               return false;
 526             }
 527
 528           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 529                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 530             {
 531               if (dump_enabled_p ())
 532                 {
 533                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 534                                    "not vectorized: different sized vector "
 535                                    "types in statement, ");
 536                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 537                                      vectype);
 538                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 539                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 540                                      vf_vectype);
 541                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 542                 }
 543               return false;
 544             }
 545
 546           if (dump_enabled_p ())
 547             {
 548               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 549               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 550               dump_printf (MSG_NOTE, "\n");
 551             }
 552
 553           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 554           if (dump_enabled_p ())
 555             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 556           if (!vectorization_factor
 557               || (nunits > vectorization_factor))
 558             vectorization_factor = nunits;
 559
 560           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 561             {
 562               pattern_def_seq = NULL;
 563               gsi_next (&si);
 564             }
 565         }
 566     }
 567
 568   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 569   if (dump_enabled_p ())
 570     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 571                      vectorization_factor);
 572   if (vectorization_factor <= 1)
 573     {
 574       if (dump_enabled_p ())
 575         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 576                          "not vectorized: unsupported data-type\n");
 577       return false;
 578     }
 579   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 580
 581   for (i = 0; i < mask_producers.length (); i++)
 582     {
 583       tree mask_type = NULL;
 584
 585       stmt = STMT_VINFO_STMT (mask_producers[i]);
 586
 587       if (gimple_code (stmt) == GIMPLE_ASSIGN
 588           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
 589           && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
 590         {
 591           scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 592           mask_type = get_mask_type_for_scalar_type (scalar_type);
 593
 594           if (!mask_type)
 595             {
 596               if (dump_enabled_p ())
 597                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 598                                  "not vectorized: unsupported mask\n");
 599               return false;
 600             }
 601         }
 602       else
 603         {
 604           tree rhs;
 605           ssa_op_iter iter;
 606           gimple *def_stmt;
 607           enum vect_def_type dt;
 608
 609           FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
 610             {
 611               if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
 612                                        &def_stmt, &dt, &vectype))
 613                 {
 614                   if (dump_enabled_p ())
 615                     {
 616                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 617                                        "not vectorized: can't compute mask type "
 618                                        "for statement, ");
 619                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 620                                         0);
 621                     }
 622                   return false;
 623                 }
 624
 625               /* No vectype probably means external definition.
 626                  Allow it in case there is another operand which
 627                  allows to determine mask type.  */
 628               if (!vectype)
 629                 continue;
 630
 631               if (!mask_type)
 632                 mask_type = vectype;
 633               else if (TYPE_VECTOR_SUBPARTS (mask_type)
 634                        != TYPE_VECTOR_SUBPARTS (vectype))
 635                 {
 636                   if (dump_enabled_p ())
 637                     {
 638                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 639                                        "not vectorized: different sized masks "
 640                                        "types in statement, ");
 641                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 642                                          mask_type);
 643                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 644                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 645                                          vectype);
 646                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 647                     }
 648                   return false;
 649                 }
 650               else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
 651                        != VECTOR_BOOLEAN_TYPE_P (vectype))
 652                 {
 653                   if (dump_enabled_p ())
 654                     {
 655                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 656                                        "not vectorized: mixed mask and "
 657                                        "nonmask vector types in statement, ");
 658                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 659                                          mask_type);
 660                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 661                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 662                                          vectype);
 663                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 664                     }
 665                   return false;
 666                 }
 667             }
 668
 669           /* We may compare boolean value loaded as vector of integers.
 670              Fix mask_type in such case.  */
 671           if (mask_type
 672               && !VECTOR_BOOLEAN_TYPE_P (mask_type)
 673               && gimple_code (stmt) == GIMPLE_ASSIGN
 674               && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
 675             mask_type = build_same_sized_truth_vector_type (mask_type);
 676         }
 677
 678       /* No mask_type should mean loop invariant predicate.
 679          This is probably a subject for optimization in
 680          if-conversion.  */
 681       if (!mask_type)
 682         {
 683           if (dump_enabled_p ())
 684             {
 685               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 686                                "not vectorized: can't compute mask type "
 687                                "for statement, ");
 688               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 689                                 0);
 690             }
 691           return false;
 692         }
 693
 694       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
 695     }
 696
 697   return true;
 698 }
 699
 700
 701 /* Function vect_is_simple_iv_evolution.
 702
 703    FORNOW: A simple evolution of an induction variables in the loop is
 704    considered a polynomial evolution.  */
 705
 706 static bool
 707 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 708                              tree * step)
 709 {
 710   tree init_expr;
 711   tree step_expr;
 712   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 713   basic_block bb;
 714
 715   /* When there is no evolution in this loop, the evolution function
 716      is not "simple".  */
 717   if (evolution_part == NULL_TREE)
 718     return false;
 719
 720   /* When the evolution is a polynomial of degree >= 2
 721      the evolution function is not "simple".  */
 722   if (tree_is_chrec (evolution_part))
 723     return false;
 724
 725   step_expr = evolution_part;
 726   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 727
 728   if (dump_enabled_p ())
 729     {
 730       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 731       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 732       dump_printf (MSG_NOTE, ",  init: ");
 733       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 734       dump_printf (MSG_NOTE, "\n");
 735     }
 736
 737   *init = init_expr;
 738   *step = step_expr;
 739
 740   if (TREE_CODE (step_expr) != INTEGER_CST
 741       && (TREE_CODE (step_expr) != SSA_NAME
 742           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 743               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 744           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 745               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 746                   || !flag_associative_math)))
 747       && (TREE_CODE (step_expr) != REAL_CST
 748           || !flag_associative_math))
 749     {
 750       if (dump_enabled_p ())
 751         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 752                          "step unknown.\n");
 753       return false;
 754     }
 755
 756   return true;
 757 }
 758
 759 /* Function vect_analyze_scalar_cycles_1.
 760
 761    Examine the cross iteration def-use cycles of scalar variables
 762    in LOOP.  LOOP_VINFO represents the loop that is now being
 763    considered for vectorization (can be LOOP, or an outer-loop
 764    enclosing LOOP).  */
 765
 766 static void
 767 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 768 {
 769   basic_block bb = loop->header;
 770   tree init, step;
 771   auto_vec<gimple *, 64> worklist;
 772   gphi_iterator gsi;
 773   bool double_reduc;
 774
 775   if (dump_enabled_p ())
 776     dump_printf_loc (MSG_NOTE, vect_location,
 777                      "=== vect_analyze_scalar_cycles ===\n");
 778
 779   /* First - identify all inductions.  Reduction detection assumes that all the
 780      inductions have been identified, therefore, this order must not be
 781      changed.  */
 782   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 783     {
 784       gphi *phi = gsi.phi ();
 785       tree access_fn = NULL;
 786       tree def = PHI_RESULT (phi);
 787       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 788
 789       if (dump_enabled_p ())
 790         {
 791           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 792           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 793         }
 794
 795       /* Skip virtual phi's.  The data dependences that are associated with
 796          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 797       if (virtual_operand_p (def))
 798         continue;
 799
 800       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 801
 802       /* Analyze the evolution function.  */
 803       access_fn = analyze_scalar_evolution (loop, def);
 804       if (access_fn)
 805         {
 806           STRIP_NOPS (access_fn);
 807           if (dump_enabled_p ())
 808             {
 809               dump_printf_loc (MSG_NOTE, vect_location,
 810                                "Access function of PHI: ");
 811               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 812               dump_printf (MSG_NOTE, "\n");
 813             }
 814           STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 815             = initial_condition_in_loop_num (access_fn, loop->num);
 816           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 817             = evolution_part_in_loop_num (access_fn, loop->num);
 818         }
 819
 820       if (!access_fn
 821           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 822           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 823               && TREE_CODE (step) != INTEGER_CST))
 824         {
 825           worklist.safe_push (phi);
 826           continue;
 827         }
 828
 829       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 830                   != NULL_TREE);
 831       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 832
 833       if (dump_enabled_p ())
 834         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 835       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 836     }
 837
 838
 839   /* Second - identify all reductions and nested cycles.  */
 840   while (worklist.length () > 0)
 841     {
 842       gimple *phi = worklist.pop ();
 843       tree def = PHI_RESULT (phi);
 844       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 845       gimple *reduc_stmt;
 846       bool nested_cycle;
 847
 848       if (dump_enabled_p ())
 849         {
 850           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 851           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 852         }
 853
 854       gcc_assert (!virtual_operand_p (def)
 855                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 856
 857       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 858       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 859                                                 &double_reduc, false);
 860       if (reduc_stmt)
 861         {
 862           if (double_reduc)
 863             {
 864               if (dump_enabled_p ())
 865                 dump_printf_loc (MSG_NOTE, vect_location,
 866                                  "Detected double reduction.\n");
 867
 868               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 869               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 870                                                     vect_double_reduction_def;
 871             }
 872           else
 873             {
 874               if (nested_cycle)
 875                 {
 876                   if (dump_enabled_p ())
 877                     dump_printf_loc (MSG_NOTE, vect_location,
 878                                      "Detected vectorizable nested cycle.\n");
 879
 880                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 881                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 882                                                              vect_nested_cycle;
 883                 }
 884               else
 885                 {
 886                   if (dump_enabled_p ())
 887                     dump_printf_loc (MSG_NOTE, vect_location,
 888                                      "Detected reduction.\n");
 889
 890                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 891                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 892                                                            vect_reduction_def;
 893                   /* Store the reduction cycles for possible vectorization in
 894                      loop-aware SLP.  */
 895                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 896                 }
 897             }
 898         }
 899       else
 900         if (dump_enabled_p ())
 901           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 902                            "Unknown def-use cycle pattern.\n");
 903     }
 904 }
 905
 906
 907 /* Function vect_analyze_scalar_cycles.
 908
 909    Examine the cross iteration def-use cycles of scalar variables, by
 910    analyzing the loop-header PHIs of scalar variables.  Classify each
 911    cycle as one of the following: invariant, induction, reduction, unknown.
 912    We do that for the loop represented by LOOP_VINFO, and also to its
 913    inner-loop, if exists.
 914    Examples for scalar cycles:
 915
 916    Example1: reduction:
 917
 918               loop1:
 919               for (i=0; i<N; i++)
 920                  sum += a[i];
 921
 922    Example2: induction:
 923
 924               loop2:
 925               for (i=0; i<N; i++)
 926                  a[i] = i;  */
 927
 928 static void
 929 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 930 {
 931   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 932
 933   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 934
 935   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 936      Reductions in such inner-loop therefore have different properties than
 937      the reductions in the nest that gets vectorized:
 938      1. When vectorized, they are executed in the same order as in the original
 939         scalar loop, so we can't change the order of computation when
 940         vectorizing them.
 941      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 942         current checks are too strict.  */
 943
 944   if (loop->inner)
 945     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 946 }
 947
 948 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 949
 950 static void
 951 vect_fixup_reduc_chain (gimple *stmt)
 952 {
 953   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 954   gimple *stmtp;
 955   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 956               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 957   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 958   do
 959     {
 960       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 961       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 962       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 963       if (stmt)
 964         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 965           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 966     }
 967   while (stmt);
 968   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 969 }
 970
 971 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 972
 973 static void
 974 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 975 {
 976   gimple *first;
 977   unsigned i;
 978
 979   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 980     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 981       {
 982         gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
 983         while (next)
 984           {
 985             if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)))
 986               break;
 987             next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
 988           }
 989         /* If not all stmt in the chain are patterns try to handle
 990            the chain without patterns.  */
 991         if (! next)
 992           {
 993             vect_fixup_reduc_chain (first);
 994             LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 995               = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 996           }
 997       }
 998 }
 999
1000 /* Function vect_get_loop_niters.
1001
1002    Determine how many iterations the loop is executed and place it
1003    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
1004    in NUMBER_OF_ITERATIONSM1.  Place the condition under which the
1005    niter information holds in ASSUMPTIONS.
1006
1007    Return the loop exit condition.  */
1008
1009
1010 static gcond *
1011 vect_get_loop_niters (struct loop *loop, tree *assumptions,
1012                       tree *number_of_iterations, tree *number_of_iterationsm1)
1013 {
1014   edge exit = single_exit (loop);
1015   struct tree_niter_desc niter_desc;
1016   tree niter_assumptions, niter, may_be_zero;
1017   gcond *cond = get_loop_exit_condition (loop);
1018
1019   *assumptions = boolean_true_node;
1020   *number_of_iterationsm1 = chrec_dont_know;
1021   *number_of_iterations = chrec_dont_know;
1022   if (dump_enabled_p ())
1023     dump_printf_loc (MSG_NOTE, vect_location,
1024                      "=== get_loop_niters ===\n");
1025
1026   if (!exit)
1027     return cond;
1028
1029   niter = chrec_dont_know;
1030   may_be_zero = NULL_TREE;
1031   niter_assumptions = boolean_true_node;
1032   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
1033       || chrec_contains_undetermined (niter_desc.niter))
1034     return cond;
1035
1036   niter_assumptions = niter_desc.assumptions;
1037   may_be_zero = niter_desc.may_be_zero;
1038   niter = niter_desc.niter;
1039
1040   if (may_be_zero && integer_zerop (may_be_zero))
1041     may_be_zero = NULL_TREE;
1042
1043   if (may_be_zero)
1044     {
1045       if (COMPARISON_CLASS_P (may_be_zero))
1046         {
1047           /* Try to combine may_be_zero with assumptions, this can simplify
1048              computation of niter expression.  */
1049           if (niter_assumptions && !integer_nonzerop (niter_assumptions))
1050             niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1051                                              niter_assumptions,
1052                                              fold_build1 (TRUTH_NOT_EXPR,
1053                                                           boolean_type_node,
1054                                                           may_be_zero));
1055           else
1056             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
1057                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1058
1059           may_be_zero = NULL_TREE;
1060         }
1061       else if (integer_nonzerop (may_be_zero))
1062         {
1063           *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
1064           *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
1065           return cond;
1066         }
1067       else
1068         return cond;
1069     }
1070
1071   *assumptions = niter_assumptions;
1072   *number_of_iterationsm1 = niter;
1073
1074   /* We want the number of loop header executions which is the number
1075      of latch executions plus one.
1076      ???  For UINT_MAX latch executions this number overflows to zero
1077      for loops like do { n++; } while (n != 0);  */
1078   if (niter && !chrec_contains_undetermined (niter))
1079     niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),
1080                           build_int_cst (TREE_TYPE (niter), 1));
1081   *number_of_iterations = niter;
1082
1083   return cond;
1084 }
1085
1086 /* Function bb_in_loop_p
1087
1088    Used as predicate for dfs order traversal of the loop bbs.  */
1089
1090 static bool
1091 bb_in_loop_p (const_basic_block bb, const void *data)
1092 {
1093   const struct loop *const loop = (const struct loop *)data;
1094   if (flow_bb_inside_loop_p (loop, bb))
1095     return true;
1096   return false;
1097 }
1098
1099
1100 /* Function new_loop_vec_info.
1101
1102    Create and initialize a new loop_vec_info struct for LOOP, as well as
1103    stmt_vec_info structs for all the stmts in LOOP.  */
1104
1105 static loop_vec_info
1106 new_loop_vec_info (struct loop *loop)
1107 {
1108   loop_vec_info res;
1109   basic_block *bbs;
1110   gimple_stmt_iterator si;
1111   unsigned int i, nbbs;
1112
1113   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1114   res->kind = vec_info::loop;
1115   LOOP_VINFO_LOOP (res) = loop;
1116
1117   bbs = get_loop_body (loop);
1118
1119   /* Create/Update stmt_info for all stmts in the loop.  */
1120   for (i = 0; i < loop->num_nodes; i++)
1121     {
1122       basic_block bb = bbs[i];
1123
1124       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1125         {
1126           gimple *phi = gsi_stmt (si);
1127           gimple_set_uid (phi, 0);
1128           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1129         }
1130
1131       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1132         {
1133           gimple *stmt = gsi_stmt (si);
1134           gimple_set_uid (stmt, 0);
1135           set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1136         }
1137     }
1138
1139   /* CHECKME: We want to visit all BBs before their successors (except for
1140      latch blocks, for which this assertion wouldn't hold).  In the simple
1141      case of the loop forms we allow, a dfs order of the BBs would the same
1142      as reversed postorder traversal, so we are safe.  */
1143
1144    free (bbs);
1145    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1146    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1147                               bbs, loop->num_nodes, loop);
1148    gcc_assert (nbbs == loop->num_nodes);
1149
1150   LOOP_VINFO_BBS (res) = bbs;
1151   LOOP_VINFO_NITERSM1 (res) = NULL;
1152   LOOP_VINFO_NITERS (res) = NULL;
1153   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1154   LOOP_VINFO_NITERS_ASSUMPTIONS (res) = NULL;
1155   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1156   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1157   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1158   LOOP_VINFO_VECT_FACTOR (res) = 0;
1159   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1160   LOOP_VINFO_DATAREFS (res) = vNULL;
1161   LOOP_VINFO_DDRS (res) = vNULL;
1162   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1163   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1164   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1165   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1166   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1167   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1168   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1169   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1170   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1171   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1172   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1173   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1174
1175   return res;
1176 }
1177
1178
1179 /* Function destroy_loop_vec_info.
1180
1181    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1182    stmts in the loop.  */
1183
1184 void
1185 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1186 {
1187   struct loop *loop;
1188   basic_block *bbs;
1189   int nbbs;
1190   gimple_stmt_iterator si;
1191   int j;
1192   vec<slp_instance> slp_instances;
1193   slp_instance instance;
1194   bool swapped;
1195
1196   if (!loop_vinfo)
1197     return;
1198
1199   loop = LOOP_VINFO_LOOP (loop_vinfo);
1200
1201   bbs = LOOP_VINFO_BBS (loop_vinfo);
1202   nbbs = clean_stmts ? loop->num_nodes : 0;
1203   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1204
1205   for (j = 0; j < nbbs; j++)
1206     {
1207       basic_block bb = bbs[j];
1208       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1209         free_stmt_vec_info (gsi_stmt (si));
1210
1211       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1212         {
1213           gimple *stmt = gsi_stmt (si);
1214
1215           /* We may have broken canonical form by moving a constant
1216              into RHS1 of a commutative op.  Fix such occurrences.  */
1217           if (swapped && is_gimple_assign (stmt))
1218             {
1219               enum tree_code code = gimple_assign_rhs_code (stmt);
1220
1221               if ((code == PLUS_EXPR
1222                    || code == POINTER_PLUS_EXPR
1223                    || code == MULT_EXPR)
1224                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1225                 swap_ssa_operands (stmt,
1226                                    gimple_assign_rhs1_ptr (stmt),
1227                                    gimple_assign_rhs2_ptr (stmt));
1228               else if (code == COND_EXPR
1229                        && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
1230                 {
1231                   tree cond_expr = gimple_assign_rhs1 (stmt);
1232                   enum tree_code cond_code = TREE_CODE (cond_expr);
1233
1234                   if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1235                     {
1236                       bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr,
1237                                                                   0));
1238                       cond_code = invert_tree_comparison (cond_code,
1239                                                           honor_nans);
1240                       if (cond_code != ERROR_MARK)
1241                         {
1242                           TREE_SET_CODE (cond_expr, cond_code);
1243                           swap_ssa_operands (stmt,
1244                                              gimple_assign_rhs2_ptr (stmt),
1245                                              gimple_assign_rhs3_ptr (stmt));
1246                         }
1247                     }
1248                 }
1249             }
1250
1251           /* Free stmt_vec_info.  */
1252           free_stmt_vec_info (stmt);
1253           gsi_next (&si);
1254         }
1255     }
1256
1257   free (LOOP_VINFO_BBS (loop_vinfo));
1258   vect_destroy_datarefs (loop_vinfo);
1259   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1260   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1261   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1262   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
1263   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1264   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1265   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1266     vect_free_slp_instance (instance);
1267
1268   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1269   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1270   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1271   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1272
1273   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1274   loop_vinfo->scalar_cost_vec.release ();
1275
1276   free (loop_vinfo);
1277   loop->aux = NULL;
1278 }
1279
1280
1281 /* Calculate the cost of one scalar iteration of the loop.  */
1282 static void
1283 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1284 {
1285   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1286   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1287   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1288   int innerloop_iters, i;
1289
1290   /* Count statements in scalar loop.  Using this as scalar cost for a single
1291      iteration for now.
1292
1293      TODO: Add outer loop support.
1294
1295      TODO: Consider assigning different costs to different scalar
1296      statements.  */
1297
1298   /* FORNOW.  */
1299   innerloop_iters = 1;
1300   if (loop->inner)
1301     innerloop_iters = 50; /* FIXME */
1302
1303   for (i = 0; i < nbbs; i++)
1304     {
1305       gimple_stmt_iterator si;
1306       basic_block bb = bbs[i];
1307
1308       if (bb->loop_father == loop->inner)
1309         factor = innerloop_iters;
1310       else
1311         factor = 1;
1312
1313       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1314         {
1315           gimple *stmt = gsi_stmt (si);
1316           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1317
1318           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1319             continue;
1320
1321           /* Skip stmts that are not vectorized inside the loop.  */
1322           if (stmt_info
1323               && !STMT_VINFO_RELEVANT_P (stmt_info)
1324               && (!STMT_VINFO_LIVE_P (stmt_info)
1325                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1326               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1327             continue;
1328
1329           vect_cost_for_stmt kind;
1330           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1331             {
1332               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1333                kind = scalar_load;
1334              else
1335                kind = scalar_store;
1336             }
1337           else
1338             kind = scalar_stmt;
1339
1340           scalar_single_iter_cost
1341             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1342                                  factor, kind, NULL, 0, vect_prologue);
1343         }
1344     }
1345   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1346     = scalar_single_iter_cost;
1347 }
1348
1349
1350 /* Function vect_analyze_loop_form_1.
1351
1352    Verify that certain CFG restrictions hold, including:
1353    - the loop has a pre-header
1354    - the loop has a single entry and exit
1355    - the loop exit condition is simple enough
1356    - the number of iterations can be analyzed, i.e, a countable loop.  The
1357      niter could be analyzed under some assumptions.  */
1358
1359 bool
1360 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1361                           tree *assumptions, tree *number_of_iterationsm1,
1362                           tree *number_of_iterations, gcond **inner_loop_cond)
1363 {
1364   if (dump_enabled_p ())
1365     dump_printf_loc (MSG_NOTE, vect_location,
1366                      "=== vect_analyze_loop_form ===\n");
1367
1368   /* Different restrictions apply when we are considering an inner-most loop,
1369      vs. an outer (nested) loop.
1370      (FORNOW. May want to relax some of these restrictions in the future).  */
1371
1372   if (!loop->inner)
1373     {
1374       /* Inner-most loop.  We currently require that the number of BBs is
1375          exactly 2 (the header and latch).  Vectorizable inner-most loops
1376          look like this:
1377
1378                         (pre-header)
1379                            |
1380                           header <--------+
1381                            | |            |
1382                            | +--> latch --+
1383                            |
1384                         (exit-bb)  */
1385
1386       if (loop->num_nodes != 2)
1387         {
1388           if (dump_enabled_p ())
1389             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1390                              "not vectorized: control flow in loop.\n");
1391           return false;
1392         }
1393
1394       if (empty_block_p (loop->header))
1395         {
1396           if (dump_enabled_p ())
1397             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1398                              "not vectorized: empty loop.\n");
1399           return false;
1400         }
1401     }
1402   else
1403     {
1404       struct loop *innerloop = loop->inner;
1405       edge entryedge;
1406
1407       /* Nested loop. We currently require that the loop is doubly-nested,
1408          contains a single inner loop, and the number of BBs is exactly 5.
1409          Vectorizable outer-loops look like this:
1410
1411                         (pre-header)
1412                            |
1413                           header <---+
1414                            |         |
1415                           inner-loop |
1416                            |         |
1417                           tail ------+
1418                            |
1419                         (exit-bb)
1420
1421          The inner-loop has the properties expected of inner-most loops
1422          as described above.  */
1423
1424       if ((loop->inner)->inner || (loop->inner)->next)
1425         {
1426           if (dump_enabled_p ())
1427             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1428                              "not vectorized: multiple nested loops.\n");
1429           return false;
1430         }
1431
1432       if (loop->num_nodes != 5)
1433         {
1434           if (dump_enabled_p ())
1435             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1436                              "not vectorized: control flow in loop.\n");
1437           return false;
1438         }
1439
1440       entryedge = loop_preheader_edge (innerloop);
1441       if (entryedge->src != loop->header
1442           || !single_exit (innerloop)
1443           || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1444         {
1445           if (dump_enabled_p ())
1446             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1447                              "not vectorized: unsupported outerloop form.\n");
1448           return false;
1449         }
1450
1451       /* Analyze the inner-loop.  */
1452       tree inner_niterm1, inner_niter, inner_assumptions;
1453       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1454                                       &inner_assumptions, &inner_niterm1,
1455                                       &inner_niter, NULL)
1456           /* Don't support analyzing niter under assumptions for inner
1457              loop.  */
1458           || !integer_onep (inner_assumptions))
1459         {
1460           if (dump_enabled_p ())
1461             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1462                              "not vectorized: Bad inner loop.\n");
1463           return false;
1464         }
1465
1466       if (!expr_invariant_in_loop_p (loop, inner_niter))
1467         {
1468           if (dump_enabled_p ())
1469             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1470                              "not vectorized: inner-loop count not"
1471                              " invariant.\n");
1472           return false;
1473         }
1474
1475       if (dump_enabled_p ())
1476         dump_printf_loc (MSG_NOTE, vect_location,
1477                          "Considering outer-loop vectorization.\n");
1478     }
1479
1480   if (!single_exit (loop)
1481       || EDGE_COUNT (loop->header->preds) != 2)
1482     {
1483       if (dump_enabled_p ())
1484         {
1485           if (!single_exit (loop))
1486             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1487                              "not vectorized: multiple exits.\n");
1488           else if (EDGE_COUNT (loop->header->preds) != 2)
1489             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1490                              "not vectorized: too many incoming edges.\n");
1491         }
1492       return false;
1493     }
1494
1495   /* We assume that the loop exit condition is at the end of the loop. i.e,
1496      that the loop is represented as a do-while (with a proper if-guard
1497      before the loop if needed), where the loop header contains all the
1498      executable statements, and the latch is empty.  */
1499   if (!empty_block_p (loop->latch)
1500       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1501     {
1502       if (dump_enabled_p ())
1503         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1504                          "not vectorized: latch block not empty.\n");
1505       return false;
1506     }
1507
1508   /* Make sure the exit is not abnormal.  */
1509   edge e = single_exit (loop);
1510   if (e->flags & EDGE_ABNORMAL)
1511     {
1512       if (dump_enabled_p ())
1513         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1514                          "not vectorized: abnormal loop exit edge.\n");
1515       return false;
1516     }
1517
1518   *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
1519                                      number_of_iterationsm1);
1520   if (!*loop_cond)
1521     {
1522       if (dump_enabled_p ())
1523         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1524                          "not vectorized: complicated exit condition.\n");
1525       return false;
1526     }
1527
1528   if (integer_zerop (*assumptions)
1529       || !*number_of_iterations
1530       || chrec_contains_undetermined (*number_of_iterations))
1531     {
1532       if (dump_enabled_p ())
1533         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1534                          "not vectorized: number of iterations cannot be "
1535                          "computed.\n");
1536       return false;
1537     }
1538
1539   if (integer_zerop (*number_of_iterations))
1540     {
1541       if (dump_enabled_p ())
1542         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1543                          "not vectorized: number of iterations = 0.\n");
1544       return false;
1545     }
1546
1547   return true;
1548 }
1549
1550 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1551
1552 loop_vec_info
1553 vect_analyze_loop_form (struct loop *loop)
1554 {
1555   tree assumptions, number_of_iterations, number_of_iterationsm1;
1556   gcond *loop_cond, *inner_loop_cond = NULL;
1557
1558   if (! vect_analyze_loop_form_1 (loop, &loop_cond,
1559                                   &assumptions, &number_of_iterationsm1,
1560                                   &number_of_iterations, &inner_loop_cond))
1561     return NULL;
1562
1563   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1564   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1565   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1566   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1567   if (!integer_onep (assumptions))
1568     {
1569       /* We consider to vectorize this loop by versioning it under
1570          some assumptions.  In order to do this, we need to clear
1571          existing information computed by scev and niter analyzer.  */
1572       scev_reset_htab ();
1573       free_numbers_of_iterations_estimates_loop (loop);
1574       /* Also set flag for this loop so that following scev and niter
1575          analysis are done under the assumptions.  */
1576       loop_constraint_set (loop, LOOP_C_FINITE);
1577       /* Also record the assumptions for versioning.  */
1578       LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = assumptions;
1579     }
1580
1581   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1582     {
1583       if (dump_enabled_p ())
1584         {
1585           dump_printf_loc (MSG_NOTE, vect_location,
1586                            "Symbolic number of iterations is ");
1587           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1588           dump_printf (MSG_NOTE, "\n");
1589         }
1590     }
1591
1592   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1593   if (inner_loop_cond)
1594     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1595       = loop_exit_ctrl_vec_info_type;
1596
1597   gcc_assert (!loop->aux);
1598   loop->aux = loop_vinfo;
1599   return loop_vinfo;
1600 }
1601
1602
1603
1604 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1605    statements update the vectorization factor.  */
1606
1607 static void
1608 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1609 {
1610   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1611   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1612   int nbbs = loop->num_nodes;
1613   unsigned int vectorization_factor;
1614   int i;
1615
1616   if (dump_enabled_p ())
1617     dump_printf_loc (MSG_NOTE, vect_location,
1618                      "=== vect_update_vf_for_slp ===\n");
1619
1620   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1621   gcc_assert (vectorization_factor != 0);
1622
1623   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1624      vectorization factor of the loop is the unrolling factor required by
1625      the SLP instances.  If that unrolling factor is 1, we say, that we
1626      perform pure SLP on loop - cross iteration parallelism is not
1627      exploited.  */
1628   bool only_slp_in_loop = true;
1629   for (i = 0; i < nbbs; i++)
1630     {
1631       basic_block bb = bbs[i];
1632       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1633            gsi_next (&si))
1634         {
1635           gimple *stmt = gsi_stmt (si);
1636           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1638               && STMT_VINFO_RELATED_STMT (stmt_info))
1639             {
1640               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1641               stmt_info = vinfo_for_stmt (stmt);
1642             }
1643           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1644                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1645               && !PURE_SLP_STMT (stmt_info))
1646             /* STMT needs both SLP and loop-based vectorization.  */
1647             only_slp_in_loop = false;
1648         }
1649     }
1650
1651   if (only_slp_in_loop)
1652     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1653   else
1654     vectorization_factor
1655       = least_common_multiple (vectorization_factor,
1656                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1657
1658   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1659   if (dump_enabled_p ())
1660     dump_printf_loc (MSG_NOTE, vect_location,
1661                      "Updating vectorization factor to %d\n",
1662                      vectorization_factor);
1663 }
1664
1665 /* Function vect_analyze_loop_operations.
1666
1667    Scan the loop stmts and make sure they are all vectorizable.  */
1668
1669 static bool
1670 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1671 {
1672   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1673   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1674   int nbbs = loop->num_nodes;
1675   int i;
1676   stmt_vec_info stmt_info;
1677   bool need_to_vectorize = false;
1678   bool ok;
1679
1680   if (dump_enabled_p ())
1681     dump_printf_loc (MSG_NOTE, vect_location,
1682                      "=== vect_analyze_loop_operations ===\n");
1683
1684   for (i = 0; i < nbbs; i++)
1685     {
1686       basic_block bb = bbs[i];
1687
1688       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1689            gsi_next (&si))
1690         {
1691           gphi *phi = si.phi ();
1692           ok = true;
1693
1694           stmt_info = vinfo_for_stmt (phi);
1695           if (dump_enabled_p ())
1696             {
1697               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1698               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1699             }
1700           if (virtual_operand_p (gimple_phi_result (phi)))
1701             continue;
1702
1703           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1704              (i.e., a phi in the tail of the outer-loop).  */
1705           if (! is_loop_header_bb_p (bb))
1706             {
1707               /* FORNOW: we currently don't support the case that these phis
1708                  are not used in the outerloop (unless it is double reduction,
1709                  i.e., this phi is vect_reduction_def), cause this case
1710                  requires to actually do something here.  */
1711               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1712                    || STMT_VINFO_LIVE_P (stmt_info))
1713                   && STMT_VINFO_DEF_TYPE (stmt_info)
1714                      != vect_double_reduction_def)
1715                 {
1716                   if (dump_enabled_p ())
1717                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718                                      "Unsupported loop-closed phi in "
1719                                      "outer-loop.\n");
1720                   return false;
1721                 }
1722
1723               /* If PHI is used in the outer loop, we check that its operand
1724                  is defined in the inner loop.  */
1725               if (STMT_VINFO_RELEVANT_P (stmt_info))
1726                 {
1727                   tree phi_op;
1728                   gimple *op_def_stmt;
1729
1730                   if (gimple_phi_num_args (phi) != 1)
1731                     return false;
1732
1733                   phi_op = PHI_ARG_DEF (phi, 0);
1734                   if (TREE_CODE (phi_op) != SSA_NAME)
1735                     return false;
1736
1737                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1738                   if (gimple_nop_p (op_def_stmt)
1739                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1740                       || !vinfo_for_stmt (op_def_stmt))
1741                     return false;
1742
1743                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1744                         != vect_used_in_outer
1745                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1746                            != vect_used_in_outer_by_reduction)
1747                     return false;
1748                 }
1749
1750               continue;
1751             }
1752
1753           gcc_assert (stmt_info);
1754
1755           if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1756                || STMT_VINFO_LIVE_P (stmt_info))
1757               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1758             {
1759               /* A scalar-dependence cycle that we don't support.  */
1760               if (dump_enabled_p ())
1761                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1762                                  "not vectorized: scalar dependence cycle.\n");
1763               return false;
1764             }
1765
1766           if (STMT_VINFO_RELEVANT_P (stmt_info))
1767             {
1768               need_to_vectorize = true;
1769               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1770                 ok = vectorizable_induction (phi, NULL, NULL);
1771             }
1772
1773           if (ok && STMT_VINFO_LIVE_P (stmt_info))
1774             ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
1775
1776           if (!ok)
1777             {
1778               if (dump_enabled_p ())
1779                 {
1780                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1781                                    "not vectorized: relevant phi not "
1782                                    "supported: ");
1783                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1784                 }
1785               return false;
1786             }
1787         }
1788
1789       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1790            gsi_next (&si))
1791         {
1792           gimple *stmt = gsi_stmt (si);
1793           if (!gimple_clobber_p (stmt)
1794               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1795             return false;
1796         }
1797     } /* bbs */
1798
1799   /* All operations in the loop are either irrelevant (deal with loop
1800      control, or dead), or only used outside the loop and can be moved
1801      out of the loop (e.g. invariants, inductions).  The loop can be
1802      optimized away by scalar optimizations.  We're better off not
1803      touching this loop.  */
1804   if (!need_to_vectorize)
1805     {
1806       if (dump_enabled_p ())
1807         dump_printf_loc (MSG_NOTE, vect_location,
1808                          "All the computation can be taken out of the loop.\n");
1809       if (dump_enabled_p ())
1810         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1811                          "not vectorized: redundant loop. no profit to "
1812                          "vectorize.\n");
1813       return false;
1814     }
1815
1816   return true;
1817 }
1818
1819
1820 /* Function vect_analyze_loop_2.
1821
1822    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1823    for it.  The different analyses will record information in the
1824    loop_vec_info struct.  */
1825 static bool
1826 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1827 {
1828   bool ok;
1829   int max_vf = MAX_VECTORIZATION_FACTOR;
1830   int min_vf = 2;
1831   unsigned int n_stmts = 0;
1832
1833   /* The first group of checks is independent of the vector size.  */
1834   fatal = true;
1835
1836   /* Find all data references in the loop (which correspond to vdefs/vuses)
1837      and analyze their evolution in the loop.  */
1838
1839   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1840
1841   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1842   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1843     {
1844       if (dump_enabled_p ())
1845         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846                          "not vectorized: loop nest containing two "
1847                          "or more consecutive inner loops cannot be "
1848                          "vectorized\n");
1849       return false;
1850     }
1851
1852   for (unsigned i = 0; i < loop->num_nodes; i++)
1853     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1854          !gsi_end_p (gsi); gsi_next (&gsi))
1855       {
1856         gimple *stmt = gsi_stmt (gsi);
1857         if (is_gimple_debug (stmt))
1858           continue;
1859         ++n_stmts;
1860         if (!find_data_references_in_stmt (loop, stmt,
1861                                            &LOOP_VINFO_DATAREFS (loop_vinfo)))
1862           {
1863             if (is_gimple_call (stmt) && loop->safelen)
1864               {
1865                 tree fndecl = gimple_call_fndecl (stmt), op;
1866                 if (fndecl != NULL_TREE)
1867                   {
1868                     cgraph_node *node = cgraph_node::get (fndecl);
1869                     if (node != NULL && node->simd_clones != NULL)
1870                       {
1871                         unsigned int j, n = gimple_call_num_args (stmt);
1872                         for (j = 0; j < n; j++)
1873                           {
1874                             op = gimple_call_arg (stmt, j);
1875                             if (DECL_P (op)
1876                                 || (REFERENCE_CLASS_P (op)
1877                                     && get_base_address (op)))
1878                               break;
1879                           }
1880                         op = gimple_call_lhs (stmt);
1881                         /* Ignore #pragma omp declare simd functions
1882                            if they don't have data references in the
1883                            call stmt itself.  */
1884                         if (j == n
1885                             && !(op
1886                                  && (DECL_P (op)
1887                                      || (REFERENCE_CLASS_P (op)
1888                                          && get_base_address (op)))))
1889                           continue;
1890                       }
1891                   }
1892               }
1893             if (dump_enabled_p ())
1894               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1895                                "not vectorized: loop contains function "
1896                                "calls or data references that cannot "
1897                                "be analyzed\n");
1898             return false;
1899           }
1900       }
1901
1902   /* Analyze the data references and also adjust the minimal
1903      vectorization factor according to the loads and stores.  */
1904
1905   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1906   if (!ok)
1907     {
1908       if (dump_enabled_p ())
1909         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1910                          "bad data references.\n");
1911       return false;
1912     }
1913
1914   /* Classify all cross-iteration scalar data-flow cycles.
1915      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1916   vect_analyze_scalar_cycles (loop_vinfo);
1917
1918   vect_pattern_recog (loop_vinfo);
1919
1920   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1921
1922   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1923      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1924
1925   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1926   if (!ok)
1927     {
1928       if (dump_enabled_p ())
1929         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1930                          "bad data access.\n");
1931       return false;
1932     }
1933
1934   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1935
1936   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1937   if (!ok)
1938     {
1939       if (dump_enabled_p ())
1940         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941                          "unexpected pattern.\n");
1942       return false;
1943     }
1944
1945   /* While the rest of the analysis below depends on it in some way.  */
1946   fatal = false;
1947
1948   /* Analyze data dependences between the data-refs in the loop
1949      and adjust the maximum vectorization factor according to
1950      the dependences.
1951      FORNOW: fail at the first data dependence that we encounter.  */
1952
1953   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1954   if (!ok
1955       || max_vf < min_vf)
1956     {
1957       if (dump_enabled_p ())
1958             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1959                              "bad data dependence.\n");
1960       return false;
1961     }
1962
1963   ok = vect_determine_vectorization_factor (loop_vinfo);
1964   if (!ok)
1965     {
1966       if (dump_enabled_p ())
1967         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1968                          "can't determine vectorization factor.\n");
1969       return false;
1970     }
1971   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1972     {
1973       if (dump_enabled_p ())
1974         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1975                          "bad data dependence.\n");
1976       return false;
1977     }
1978
1979   /* Compute the scalar iteration cost.  */
1980   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1981
1982   int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1983   HOST_WIDE_INT estimated_niter;
1984   unsigned th;
1985   int min_scalar_loop_bound;
1986
1987   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1988   ok = vect_analyze_slp (loop_vinfo, n_stmts);
1989   if (!ok)
1990     return false;
1991
1992   /* If there are any SLP instances mark them as pure_slp.  */
1993   bool slp = vect_make_slp_decision (loop_vinfo);
1994   if (slp)
1995     {
1996       /* Find stmts that need to be both vectorized and SLPed.  */
1997       vect_detect_hybrid_slp (loop_vinfo);
1998
1999       /* Update the vectorization factor based on the SLP decision.  */
2000       vect_update_vf_for_slp (loop_vinfo);
2001     }
2002
2003   /* This is the point where we can re-start analysis with SLP forced off.  */
2004 start_over:
2005
2006   /* Now the vectorization factor is final.  */
2007   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2008   gcc_assert (vectorization_factor != 0);
2009
2010   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
2011     dump_printf_loc (MSG_NOTE, vect_location,
2012                      "vectorization_factor = %d, niters = "
2013                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
2014                      LOOP_VINFO_INT_NITERS (loop_vinfo));
2015
2016   HOST_WIDE_INT max_niter
2017     = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2018   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2019        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
2020       || (max_niter != -1
2021           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
2022     {
2023       if (dump_enabled_p ())
2024         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2025                          "not vectorized: iteration count smaller than "
2026                          "vectorization factor.\n");
2027       return false;
2028     }
2029
2030   /* Analyze the alignment of the data-refs in the loop.
2031      Fail if a data reference is found that cannot be vectorized.  */
2032
2033   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2034   if (!ok)
2035     {
2036       if (dump_enabled_p ())
2037         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2038                          "bad data alignment.\n");
2039       return false;
2040     }
2041
2042   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2043      It is important to call pruning after vect_analyze_data_ref_accesses,
2044      since we use grouping information gathered by interleaving analysis.  */
2045   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2046   if (!ok)
2047     return false;
2048
2049   /* This pass will decide on using loop versioning and/or loop peeling in
2050      order to enhance the alignment of data references in the loop.  */
2051   ok = vect_enhance_data_refs_alignment (loop_vinfo);
2052   if (!ok)
2053     {
2054       if (dump_enabled_p ())
2055         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2056                          "bad data alignment.\n");
2057       return false;
2058     }
2059
2060   if (slp)
2061     {
2062       /* Analyze operations in the SLP instances.  Note this may
2063          remove unsupported SLP instances which makes the above
2064          SLP kind detection invalid.  */
2065       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2066       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2067                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2068       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2069         goto again;
2070     }
2071
2072   /* Scan all the remaining operations in the loop that are not subject
2073      to SLP and make sure they are vectorizable.  */
2074   ok = vect_analyze_loop_operations (loop_vinfo);
2075   if (!ok)
2076     {
2077       if (dump_enabled_p ())
2078         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2079                          "bad operation or unsupported loop bound.\n");
2080       return false;
2081     }
2082
2083   /* If epilog loop is required because of data accesses with gaps,
2084      one additional iteration needs to be peeled.  Check if there is
2085      enough iterations for vectorization.  */
2086   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2087       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2088     {
2089       int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2090       tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2091
2092       if (wi::to_widest (scalar_niters) < vf)
2093         {
2094           if (dump_enabled_p ())
2095             dump_printf_loc (MSG_NOTE, vect_location,
2096                              "loop has no enough iterations to support"
2097                              " peeling for gaps.\n");
2098           return false;
2099         }
2100     }
2101
2102   /* Analyze cost.  Decide if worth while to vectorize.  */
2103   int min_profitable_estimate, min_profitable_iters;
2104   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2105                                       &min_profitable_estimate);
2106
2107   if (min_profitable_iters < 0)
2108     {
2109       if (dump_enabled_p ())
2110         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2111                          "not vectorized: vectorization not profitable.\n");
2112       if (dump_enabled_p ())
2113         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2114                          "not vectorized: vector version will never be "
2115                          "profitable.\n");
2116       goto again;
2117     }
2118
2119   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2120                             * vectorization_factor) - 1);
2121
2122   /* Use the cost model only if it is more conservative than user specified
2123      threshold.  */
2124   th = (unsigned) min_scalar_loop_bound;
2125   if (min_profitable_iters
2126       && (!min_scalar_loop_bound
2127           || min_profitable_iters > min_scalar_loop_bound))
2128     th = (unsigned) min_profitable_iters;
2129
2130   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2131
2132   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2133       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2134     {
2135       if (dump_enabled_p ())
2136         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2137                          "not vectorized: vectorization not profitable.\n");
2138       if (dump_enabled_p ())
2139         dump_printf_loc (MSG_NOTE, vect_location,
2140                          "not vectorized: iteration count smaller than user "
2141                          "specified loop bound parameter or minimum profitable "
2142                          "iterations (whichever is more conservative).\n");
2143       goto again;
2144     }
2145
2146   estimated_niter
2147     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2148   if (estimated_niter == -1)
2149     estimated_niter = max_niter;
2150   if (estimated_niter != -1
2151       && ((unsigned HOST_WIDE_INT) estimated_niter
2152           <= MAX (th, (unsigned)min_profitable_estimate)))
2153     {
2154       if (dump_enabled_p ())
2155         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2156                          "not vectorized: estimated iteration count too "
2157                          "small.\n");
2158       if (dump_enabled_p ())
2159         dump_printf_loc (MSG_NOTE, vect_location,
2160                          "not vectorized: estimated iteration count smaller "
2161                          "than specified loop bound parameter or minimum "
2162                          "profitable iterations (whichever is more "
2163                          "conservative).\n");
2164       goto again;
2165     }
2166
2167   /* Decide whether we need to create an epilogue loop to handle
2168      remaining scalar iterations.  */
2169   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2170         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2171        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2172
2173   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2174       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2175     {
2176       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2177                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2178           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2179         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2180     }
2181   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2182            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2183                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2184                /* In case of versioning, check if the maximum number of
2185                   iterations is greater than th.  If they are identical,
2186                   the epilogue is unnecessary.  */
2187                && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2188                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2189     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2190
2191   /* If an epilogue loop is required make sure we can create one.  */
2192   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2193       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2194     {
2195       if (dump_enabled_p ())
2196         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2197       if (!vect_can_advance_ivs_p (loop_vinfo)
2198           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2199                                            single_exit (LOOP_VINFO_LOOP
2200                                                          (loop_vinfo))))
2201         {
2202           if (dump_enabled_p ())
2203             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204                              "not vectorized: can't create required "
2205                              "epilog loop\n");
2206           goto again;
2207         }
2208     }
2209
2210   gcc_assert (vectorization_factor
2211               == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2212
2213   /* Ok to vectorize!  */
2214   return true;
2215
2216 again:
2217   /* Try again with SLP forced off but if we didn't do any SLP there is
2218      no point in re-trying.  */
2219   if (!slp)
2220     return false;
2221
2222   /* If there are reduction chains re-trying will fail anyway.  */
2223   if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
2224     return false;
2225
2226   /* Likewise if the grouped loads or stores in the SLP cannot be handled
2227      via interleaving or lane instructions.  */
2228   slp_instance instance;
2229   slp_tree node;
2230   unsigned i, j;
2231   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2232     {
2233       stmt_vec_info vinfo;
2234       vinfo = vinfo_for_stmt
2235           (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2236       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2237         continue;
2238       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2239       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
2240       tree vectype = STMT_VINFO_VECTYPE (vinfo);
2241       if (! vect_store_lanes_supported (vectype, size)
2242           && ! vect_grouped_store_supported (vectype, size))
2243         return false;
2244       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2245         {
2246           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2247           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2248           bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
2249           size = STMT_VINFO_GROUP_SIZE (vinfo);
2250           vectype = STMT_VINFO_VECTYPE (vinfo);
2251           if (! vect_load_lanes_supported (vectype, size)
2252               && ! vect_grouped_load_supported (vectype, single_element_p,
2253                                                 size))
2254             return false;
2255         }
2256     }
2257
2258   if (dump_enabled_p ())
2259     dump_printf_loc (MSG_NOTE, vect_location,
2260                      "re-trying with SLP disabled\n");
2261
2262   /* Roll back state appropriately.  No SLP this time.  */
2263   slp = false;
2264   /* Restore vectorization factor as it were without SLP.  */
2265   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2266   /* Free the SLP instances.  */
2267   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2268     vect_free_slp_instance (instance);
2269   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2270   /* Reset SLP type to loop_vect on all stmts.  */
2271   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2272     {
2273       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2274       for (gimple_stmt_iterator si = gsi_start_bb (bb);
2275            !gsi_end_p (si); gsi_next (&si))
2276         {
2277           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2278           STMT_SLP_TYPE (stmt_info) = loop_vect;
2279           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2280             {
2281               stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2282               STMT_SLP_TYPE (stmt_info) = loop_vect;
2283               for (gimple_stmt_iterator pi
2284                      = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2285                    !gsi_end_p (pi); gsi_next (&pi))
2286                 {
2287                   gimple *pstmt = gsi_stmt (pi);
2288                   STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2289                 }
2290             }
2291         }
2292     }
2293   /* Free optimized alias test DDRS.  */
2294   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2295   /* Reset target cost data.  */
2296   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2297   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2298     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2299   /* Reset assorted flags.  */
2300   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2301   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2302   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2303
2304   goto start_over;
2305 }
2306
2307 /* Function vect_analyze_loop.
2308
2309    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2310    for it.  The different analyses will record information in the
2311    loop_vec_info struct.  */
2312 loop_vec_info
2313 vect_analyze_loop (struct loop *loop)
2314 {
2315   loop_vec_info loop_vinfo;
2316   unsigned int vector_sizes;
2317
2318   /* Autodetect first vector size we try.  */
2319   current_vector_size = 0;
2320   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2321
2322   if (dump_enabled_p ())
2323     dump_printf_loc (MSG_NOTE, vect_location,
2324                      "===== analyze_loop_nest =====\n");
2325
2326   if (loop_outer (loop)
2327       && loop_vec_info_for_loop (loop_outer (loop))
2328       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2329     {
2330       if (dump_enabled_p ())
2331         dump_printf_loc (MSG_NOTE, vect_location,
2332                          "outer-loop already vectorized.\n");
2333       return NULL;
2334     }
2335
2336   while (1)
2337     {
2338       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2339       loop_vinfo = vect_analyze_loop_form (loop);
2340       if (!loop_vinfo)
2341         {
2342           if (dump_enabled_p ())
2343             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2344                              "bad loop form.\n");
2345           return NULL;
2346         }
2347
2348       bool fatal = false;
2349       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2350         {
2351           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2352
2353           return loop_vinfo;
2354         }
2355
2356       destroy_loop_vec_info (loop_vinfo, true);
2357
2358       vector_sizes &= ~current_vector_size;
2359       if (fatal
2360           || vector_sizes == 0
2361           || current_vector_size == 0)
2362         return NULL;
2363
2364       /* Try the next biggest vector size.  */
2365       current_vector_size = 1 << floor_log2 (vector_sizes);
2366       if (dump_enabled_p ())
2367         dump_printf_loc (MSG_NOTE, vect_location,
2368                          "***** Re-trying analysis with "
2369                          "vector size %d\n", current_vector_size);
2370     }
2371 }
2372
2373
2374 /* Function reduction_code_for_scalar_code
2375
2376    Input:
2377    CODE - tree_code of a reduction operations.
2378
2379    Output:
2380    REDUC_CODE - the corresponding tree-code to be used to reduce the
2381       vector of partial results into a single scalar result, or ERROR_MARK
2382       if the operation is a supported reduction operation, but does not have
2383       such a tree-code.
2384
2385    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2386
2387 static bool
2388 reduction_code_for_scalar_code (enum tree_code code,
2389                                 enum tree_code *reduc_code)
2390 {
2391   switch (code)
2392     {
2393       case MAX_EXPR:
2394         *reduc_code = REDUC_MAX_EXPR;
2395         return true;
2396
2397       case MIN_EXPR:
2398         *reduc_code = REDUC_MIN_EXPR;
2399         return true;
2400
2401       case PLUS_EXPR:
2402         *reduc_code = REDUC_PLUS_EXPR;
2403         return true;
2404
2405       case MULT_EXPR:
2406       case MINUS_EXPR:
2407       case BIT_IOR_EXPR:
2408       case BIT_XOR_EXPR:
2409       case BIT_AND_EXPR:
2410         *reduc_code = ERROR_MARK;
2411         return true;
2412
2413       default:
2414        return false;
2415     }
2416 }
2417
2418
2419 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2420    STMT is printed with a message MSG. */
2421
2422 static void
2423 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2424 {
2425   dump_printf_loc (msg_type, vect_location, "%s", msg);
2426   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2427 }
2428
2429
2430 /* Detect SLP reduction of the form:
2431
2432    #a1 = phi <a5, a0>
2433    a2 = operation (a1)
2434    a3 = operation (a2)
2435    a4 = operation (a3)
2436    a5 = operation (a4)
2437
2438    #a = phi <a5>
2439
2440    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2441    FIRST_STMT is the first reduction stmt in the chain
2442    (a2 = operation (a1)).
2443
2444    Return TRUE if a reduction chain was detected.  */
2445
2446 static bool
2447 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2448                        gimple *first_stmt)
2449 {
2450   struct loop *loop = (gimple_bb (phi))->loop_father;
2451   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2452   enum tree_code code;
2453   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2454   stmt_vec_info use_stmt_info, current_stmt_info;
2455   tree lhs;
2456   imm_use_iterator imm_iter;
2457   use_operand_p use_p;
2458   int nloop_uses, size = 0, n_out_of_loop_uses;
2459   bool found = false;
2460
2461   if (loop != vect_loop)
2462     return false;
2463
2464   lhs = PHI_RESULT (phi);
2465   code = gimple_assign_rhs_code (first_stmt);
2466   while (1)
2467     {
2468       nloop_uses = 0;
2469       n_out_of_loop_uses = 0;
2470       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2471         {
2472           gimple *use_stmt = USE_STMT (use_p);
2473           if (is_gimple_debug (use_stmt))
2474             continue;
2475
2476           /* Check if we got back to the reduction phi.  */
2477           if (use_stmt == phi)
2478             {
2479               loop_use_stmt = use_stmt;
2480               found = true;
2481               break;
2482             }
2483
2484           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2485             {
2486               loop_use_stmt = use_stmt;
2487               nloop_uses++;
2488             }
2489            else
2490              n_out_of_loop_uses++;
2491
2492            /* There are can be either a single use in the loop or two uses in
2493               phi nodes.  */
2494            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2495              return false;
2496         }
2497
2498       if (found)
2499         break;
2500
2501       /* We reached a statement with no loop uses.  */
2502       if (nloop_uses == 0)
2503         return false;
2504
2505       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2506       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2507         return false;
2508
2509       if (!is_gimple_assign (loop_use_stmt)
2510           || code != gimple_assign_rhs_code (loop_use_stmt)
2511           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2512         return false;
2513
2514       /* Insert USE_STMT into reduction chain.  */
2515       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2516       if (current_stmt)
2517         {
2518           current_stmt_info = vinfo_for_stmt (current_stmt);
2519           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2520           GROUP_FIRST_ELEMENT (use_stmt_info)
2521             = GROUP_FIRST_ELEMENT (current_stmt_info);
2522         }
2523       else
2524         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2525
2526       lhs = gimple_assign_lhs (loop_use_stmt);
2527       current_stmt = loop_use_stmt;
2528       size++;
2529    }
2530
2531   if (!found || loop_use_stmt != phi || size < 2)
2532     return false;
2533
2534   /* Swap the operands, if needed, to make the reduction operand be the second
2535      operand.  */
2536   lhs = PHI_RESULT (phi);
2537   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2538   while (next_stmt)
2539     {
2540       if (gimple_assign_rhs2 (next_stmt) == lhs)
2541         {
2542           tree op = gimple_assign_rhs1 (next_stmt);
2543           gimple *def_stmt = NULL;
2544
2545           if (TREE_CODE (op) == SSA_NAME)
2546             def_stmt = SSA_NAME_DEF_STMT (op);
2547
2548           /* Check that the other def is either defined in the loop
2549              ("vect_internal_def"), or it's an induction (defined by a
2550              loop-header phi-node).  */
2551           if (def_stmt
2552               && gimple_bb (def_stmt)
2553               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2554               && (is_gimple_assign (def_stmt)
2555                   || is_gimple_call (def_stmt)
2556                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2557                            == vect_induction_def
2558                   || (gimple_code (def_stmt) == GIMPLE_PHI
2559                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2560                                   == vect_internal_def
2561                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2562             {
2563               lhs = gimple_assign_lhs (next_stmt);
2564               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2565               continue;
2566             }
2567
2568           return false;
2569         }
2570       else
2571         {
2572           tree op = gimple_assign_rhs2 (next_stmt);
2573           gimple *def_stmt = NULL;
2574
2575           if (TREE_CODE (op) == SSA_NAME)
2576             def_stmt = SSA_NAME_DEF_STMT (op);
2577
2578           /* Check that the other def is either defined in the loop
2579             ("vect_internal_def"), or it's an induction (defined by a
2580             loop-header phi-node).  */
2581           if (def_stmt
2582               && gimple_bb (def_stmt)
2583               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2584               && (is_gimple_assign (def_stmt)
2585                   || is_gimple_call (def_stmt)
2586                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2587                               == vect_induction_def
2588                   || (gimple_code (def_stmt) == GIMPLE_PHI
2589                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2590                                   == vect_internal_def
2591                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2592             {
2593               if (dump_enabled_p ())
2594                 {
2595                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2596                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2597                 }
2598
2599               swap_ssa_operands (next_stmt,
2600                                  gimple_assign_rhs1_ptr (next_stmt),
2601                                  gimple_assign_rhs2_ptr (next_stmt));
2602               update_stmt (next_stmt);
2603
2604               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2605                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2606             }
2607           else
2608             return false;
2609         }
2610
2611       lhs = gimple_assign_lhs (next_stmt);
2612       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2613     }
2614
2615   /* Save the chain for further analysis in SLP detection.  */
2616   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2617   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2618   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2619
2620   return true;
2621 }
2622
2623
2624 /* Function vect_is_simple_reduction_1
2625
2626    (1) Detect a cross-iteration def-use cycle that represents a simple
2627    reduction computation.  We look for the following pattern:
2628
2629    loop_header:
2630      a1 = phi < a0, a2 >
2631      a3 = ...
2632      a2 = operation (a3, a1)
2633
2634    or
2635
2636    a3 = ...
2637    loop_header:
2638      a1 = phi < a0, a2 >
2639      a2 = operation (a3, a1)
2640
2641    such that:
2642    1. operation is commutative and associative and it is safe to
2643       change the order of the computation (if CHECK_REDUCTION is true)
2644    2. no uses for a2 in the loop (a2 is used out of the loop)
2645    3. no uses of a1 in the loop besides the reduction operation
2646    4. no uses of a1 outside the loop.
2647
2648    Conditions 1,4 are tested here.
2649    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2650
2651    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2652    nested cycles, if CHECK_REDUCTION is false.
2653
2654    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2655    reductions:
2656
2657      a1 = phi < a0, a2 >
2658      inner loop (def of a3)
2659      a2 = phi < a3 >
2660
2661    (4) Detect condition expressions, ie:
2662      for (int i = 0; i < N; i++)
2663        if (a[i] < val)
2664         ret_val = a[i];
2665
2666 */
2667
2668 static gimple *
2669 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2670                           bool check_reduction, bool *double_reduc,
2671                           bool need_wrapping_integral_overflow,
2672                           enum vect_reduction_type *v_reduc_type)
2673 {
2674   struct loop *loop = (gimple_bb (phi))->loop_father;
2675   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2676   edge latch_e = loop_latch_edge (loop);
2677   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2678   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
2679   enum tree_code orig_code, code;
2680   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2681   tree type;
2682   int nloop_uses;
2683   tree name;
2684   imm_use_iterator imm_iter;
2685   use_operand_p use_p;
2686   bool phi_def;
2687
2688   *double_reduc = false;
2689   *v_reduc_type = TREE_CODE_REDUCTION;
2690
2691   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2692      otherwise, we assume outer loop vectorization.  */
2693   gcc_assert ((check_reduction && loop == vect_loop)
2694               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2695
2696   name = PHI_RESULT (phi);
2697   /* ???  If there are no uses of the PHI result the inner loop reduction
2698      won't be detected as possibly double-reduction by vectorizable_reduction
2699      because that tries to walk the PHI arg from the preheader edge which
2700      can be constant.  See PR60382.  */
2701   if (has_zero_uses (name))
2702     return NULL;
2703   nloop_uses = 0;
2704   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2705     {
2706       gimple *use_stmt = USE_STMT (use_p);
2707       if (is_gimple_debug (use_stmt))
2708         continue;
2709
2710       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2711         {
2712           if (dump_enabled_p ())
2713             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2714                              "intermediate value used outside loop.\n");
2715
2716           return NULL;
2717         }
2718
2719       nloop_uses++;
2720       if (nloop_uses > 1)
2721         {
2722           if (dump_enabled_p ())
2723             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2724                              "reduction used in loop.\n");
2725           return NULL;
2726         }
2727
2728       phi_use_stmt = use_stmt;
2729     }
2730
2731   if (TREE_CODE (loop_arg) != SSA_NAME)
2732     {
2733       if (dump_enabled_p ())
2734         {
2735           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2736                            "reduction: not ssa_name: ");
2737           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2738           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2739         }
2740       return NULL;
2741     }
2742
2743   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2744   if (!def_stmt)
2745     {
2746       if (dump_enabled_p ())
2747         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2748                          "reduction: no def_stmt.\n");
2749       return NULL;
2750     }
2751
2752   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2753     {
2754       if (dump_enabled_p ())
2755         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2756       return NULL;
2757     }
2758
2759   if (is_gimple_assign (def_stmt))
2760     {
2761       name = gimple_assign_lhs (def_stmt);
2762       phi_def = false;
2763     }
2764   else
2765     {
2766       name = PHI_RESULT (def_stmt);
2767       phi_def = true;
2768     }
2769
2770   nloop_uses = 0;
2771   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2772     {
2773       gimple *use_stmt = USE_STMT (use_p);
2774       if (is_gimple_debug (use_stmt))
2775         continue;
2776       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2777         nloop_uses++;
2778       if (nloop_uses > 1)
2779         {
2780           if (dump_enabled_p ())
2781             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2782                              "reduction used in loop.\n");
2783           return NULL;
2784         }
2785     }
2786
2787   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2788      defined in the inner loop.  */
2789   if (phi_def)
2790     {
2791       op1 = PHI_ARG_DEF (def_stmt, 0);
2792
2793       if (gimple_phi_num_args (def_stmt) != 1
2794           || TREE_CODE (op1) != SSA_NAME)
2795         {
2796           if (dump_enabled_p ())
2797             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2798                              "unsupported phi node definition.\n");
2799
2800           return NULL;
2801         }
2802
2803       def1 = SSA_NAME_DEF_STMT (op1);
2804       if (gimple_bb (def1)
2805           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2806           && loop->inner
2807           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2808           && is_gimple_assign (def1)
2809           && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
2810         {
2811           if (dump_enabled_p ())
2812             report_vect_op (MSG_NOTE, def_stmt,
2813                             "detected double reduction: ");
2814
2815           *double_reduc = true;
2816           return def_stmt;
2817         }
2818
2819       return NULL;
2820     }
2821
2822   code = orig_code = gimple_assign_rhs_code (def_stmt);
2823
2824   /* We can handle "res -= x[i]", which is non-associative by
2825      simply rewriting this into "res += -x[i]".  Avoid changing
2826      gimple instruction for the first simple tests and only do this
2827      if we're allowed to change code at all.  */
2828   if (code == MINUS_EXPR
2829       && (op1 = gimple_assign_rhs1 (def_stmt))
2830       && TREE_CODE (op1) == SSA_NAME
2831       && SSA_NAME_DEF_STMT (op1) == phi)
2832     code = PLUS_EXPR;
2833
2834   if (code == COND_EXPR)
2835     {
2836       if (check_reduction)
2837         *v_reduc_type = COND_REDUCTION;
2838     }
2839   else if (!commutative_tree_code (code) || !associative_tree_code (code))
2840     {
2841       if (dump_enabled_p ())
2842         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2843                         "reduction: not commutative/associative: ");
2844       return NULL;
2845     }
2846
2847   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2848     {
2849       if (code != COND_EXPR)
2850         {
2851           if (dump_enabled_p ())
2852             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2853                             "reduction: not binary operation: ");
2854
2855           return NULL;
2856         }
2857
2858       op3 = gimple_assign_rhs1 (def_stmt);
2859       if (COMPARISON_CLASS_P (op3))
2860         {
2861           op4 = TREE_OPERAND (op3, 1);
2862           op3 = TREE_OPERAND (op3, 0);
2863         }
2864
2865       op1 = gimple_assign_rhs2 (def_stmt);
2866       op2 = gimple_assign_rhs3 (def_stmt);
2867
2868       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2869         {
2870           if (dump_enabled_p ())
2871             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2872                             "reduction: uses not ssa_names: ");
2873
2874           return NULL;
2875         }
2876     }
2877   else
2878     {
2879       op1 = gimple_assign_rhs1 (def_stmt);
2880       op2 = gimple_assign_rhs2 (def_stmt);
2881
2882       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2883         {
2884           if (dump_enabled_p ())
2885             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2886                             "reduction: uses not ssa_names: ");
2887
2888           return NULL;
2889         }
2890    }
2891
2892   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2893   if ((TREE_CODE (op1) == SSA_NAME
2894        && !types_compatible_p (type,TREE_TYPE (op1)))
2895       || (TREE_CODE (op2) == SSA_NAME
2896           && !types_compatible_p (type, TREE_TYPE (op2)))
2897       || (op3 && TREE_CODE (op3) == SSA_NAME
2898           && !types_compatible_p (type, TREE_TYPE (op3)))
2899       || (op4 && TREE_CODE (op4) == SSA_NAME
2900           && !types_compatible_p (type, TREE_TYPE (op4))))
2901     {
2902       if (dump_enabled_p ())
2903         {
2904           dump_printf_loc (MSG_NOTE, vect_location,
2905                            "reduction: multiple types: operation type: ");
2906           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2907           dump_printf (MSG_NOTE, ", operands types: ");
2908           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2909                              TREE_TYPE (op1));
2910           dump_printf (MSG_NOTE, ",");
2911           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2912                              TREE_TYPE (op2));
2913           if (op3)
2914             {
2915               dump_printf (MSG_NOTE, ",");
2916               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2917                                  TREE_TYPE (op3));
2918             }
2919
2920           if (op4)
2921             {
2922               dump_printf (MSG_NOTE, ",");
2923               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2924                                  TREE_TYPE (op4));
2925             }
2926           dump_printf (MSG_NOTE, "\n");
2927         }
2928
2929       return NULL;
2930     }
2931
2932   /* Check that it's ok to change the order of the computation.
2933      Generally, when vectorizing a reduction we change the order of the
2934      computation.  This may change the behavior of the program in some
2935      cases, so we need to check that this is ok.  One exception is when
2936      vectorizing an outer-loop: the inner-loop is executed sequentially,
2937      and therefore vectorizing reductions in the inner-loop during
2938      outer-loop vectorization is safe.  */
2939
2940   if (*v_reduc_type != COND_REDUCTION
2941       && check_reduction)
2942     {
2943       /* CHECKME: check for !flag_finite_math_only too?  */
2944       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math)
2945         {
2946           /* Changing the order of operations changes the semantics.  */
2947           if (dump_enabled_p ())
2948             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2949                         "reduction: unsafe fp math optimization: ");
2950           return NULL;
2951         }
2952       else if (INTEGRAL_TYPE_P (type))
2953         {
2954           if (!operation_no_trapping_overflow (type, code))
2955             {
2956               /* Changing the order of operations changes the semantics.  */
2957               if (dump_enabled_p ())
2958                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2959                                 "reduction: unsafe int math optimization"
2960                                 " (overflow traps): ");
2961               return NULL;
2962             }
2963           if (need_wrapping_integral_overflow
2964               && !TYPE_OVERFLOW_WRAPS (type)
2965               && operation_can_overflow (code))
2966             {
2967               /* Changing the order of operations changes the semantics.  */
2968               if (dump_enabled_p ())
2969                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2970                                 "reduction: unsafe int math optimization"
2971                                 " (overflow doesn't wrap): ");
2972               return NULL;
2973             }
2974         }
2975       else if (SAT_FIXED_POINT_TYPE_P (type))
2976         {
2977           /* Changing the order of operations changes the semantics.  */
2978           if (dump_enabled_p ())
2979           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2980                           "reduction: unsafe fixed-point math optimization: ");
2981           return NULL;
2982         }
2983     }
2984
2985   /* Reduction is safe. We're dealing with one of the following:
2986      1) integer arithmetic and no trapv
2987      2) floating point arithmetic, and special flags permit this optimization
2988      3) nested cycle (i.e., outer loop vectorization).  */
2989   if (TREE_CODE (op1) == SSA_NAME)
2990     def1 = SSA_NAME_DEF_STMT (op1);
2991
2992   if (TREE_CODE (op2) == SSA_NAME)
2993     def2 = SSA_NAME_DEF_STMT (op2);
2994
2995   if (code != COND_EXPR
2996       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2997     {
2998       if (dump_enabled_p ())
2999         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
3000       return NULL;
3001     }
3002
3003   /* Check that one def is the reduction def, defined by PHI,
3004      the other def is either defined in the loop ("vect_internal_def"),
3005      or it's an induction (defined by a loop-header phi-node).  */
3006
3007   if (def2 && def2 == phi
3008       && (code == COND_EXPR
3009           || !def1 || gimple_nop_p (def1)
3010           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
3011           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
3012               && (is_gimple_assign (def1)
3013                   || is_gimple_call (def1)
3014                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3015                       == vect_induction_def
3016                   || (gimple_code (def1) == GIMPLE_PHI
3017                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3018                           == vect_internal_def
3019                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
3020     {
3021       if (dump_enabled_p ())
3022         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3023       return def_stmt;
3024     }
3025
3026   if (def1 && def1 == phi
3027       && (code == COND_EXPR
3028           || !def2 || gimple_nop_p (def2)
3029           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
3030           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
3031               && (is_gimple_assign (def2)
3032                   || is_gimple_call (def2)
3033                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3034                        == vect_induction_def
3035                   || (gimple_code (def2) == GIMPLE_PHI
3036                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3037                            == vect_internal_def
3038                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
3039     {
3040       if (check_reduction && orig_code != MINUS_EXPR)
3041         {
3042           /* Check if we can swap operands (just for simplicity - so that
3043              the rest of the code can assume that the reduction variable
3044              is always the last (second) argument).  */
3045           if (code == COND_EXPR)
3046             {
3047               /* Swap cond_expr by inverting the condition.  */
3048               tree cond_expr = gimple_assign_rhs1 (def_stmt);
3049               enum tree_code invert_code = ERROR_MARK;
3050               enum tree_code cond_code = TREE_CODE (cond_expr);
3051
3052               if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
3053                 {
3054                   bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
3055                   invert_code = invert_tree_comparison (cond_code, honor_nans);
3056                 }
3057               if (invert_code != ERROR_MARK)
3058                 {
3059                   TREE_SET_CODE (cond_expr, invert_code);
3060                   swap_ssa_operands (def_stmt,
3061                                      gimple_assign_rhs2_ptr (def_stmt),
3062                                      gimple_assign_rhs3_ptr (def_stmt));
3063                 }
3064               else
3065                 {
3066                   if (dump_enabled_p ())
3067                     report_vect_op (MSG_NOTE, def_stmt,
3068                                     "detected reduction: cannot swap operands "
3069                                     "for cond_expr");
3070                   return NULL;
3071                 }
3072             }
3073           else
3074             swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
3075                                gimple_assign_rhs2_ptr (def_stmt));
3076
3077           if (dump_enabled_p ())
3078             report_vect_op (MSG_NOTE, def_stmt,
3079                             "detected reduction: need to swap operands: ");
3080
3081           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
3082             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
3083         }
3084       else
3085         {
3086           if (dump_enabled_p ())
3087             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3088         }
3089
3090       return def_stmt;
3091     }
3092
3093   /* Try to find SLP reduction chain.  */
3094   if (check_reduction && code != COND_EXPR
3095       && vect_is_slp_reduction (loop_info, phi, def_stmt))
3096     {
3097       if (dump_enabled_p ())
3098         report_vect_op (MSG_NOTE, def_stmt,
3099                         "reduction: detected reduction chain: ");
3100
3101       return def_stmt;
3102     }
3103
3104   if (dump_enabled_p ())
3105     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3106                     "reduction: unknown pattern: ");
3107
3108   return NULL;
3109 }
3110
3111 /* Wrapper around vect_is_simple_reduction_1, which will modify code
3112    in-place if it enables detection of more reductions.  Arguments
3113    as there.  */
3114
3115 gimple *
3116 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
3117                              bool check_reduction, bool *double_reduc,
3118                              bool need_wrapping_integral_overflow)
3119 {
3120   enum vect_reduction_type v_reduc_type;
3121   return vect_is_simple_reduction (loop_info, phi, check_reduction,
3122                                    double_reduc,
3123                                    need_wrapping_integral_overflow,
3124                                    &v_reduc_type);
3125 }
3126
3127 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
3128 int
3129 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3130                              int *peel_iters_epilogue,
3131                              stmt_vector_for_cost *scalar_cost_vec,
3132                              stmt_vector_for_cost *prologue_cost_vec,
3133                              stmt_vector_for_cost *epilogue_cost_vec)
3134 {
3135   int retval = 0;
3136   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3137
3138   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3139     {
3140       *peel_iters_epilogue = vf/2;
3141       if (dump_enabled_p ())
3142         dump_printf_loc (MSG_NOTE, vect_location,
3143                          "cost model: epilogue peel iters set to vf/2 "
3144                          "because loop iterations are unknown .\n");
3145
3146       /* If peeled iterations are known but number of scalar loop
3147          iterations are unknown, count a taken branch per peeled loop.  */
3148       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3149                                  NULL, 0, vect_prologue);
3150       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3151                                  NULL, 0, vect_epilogue);
3152     }
3153   else
3154     {
3155       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3156       peel_iters_prologue = niters < peel_iters_prologue ?
3157                             niters : peel_iters_prologue;
3158       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
3159       /* If we need to peel for gaps, but no peeling is required, we have to
3160          peel VF iterations.  */
3161       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3162         *peel_iters_epilogue = vf;
3163     }
3164
3165   stmt_info_for_cost *si;
3166   int j;
3167   if (peel_iters_prologue)
3168     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3169       retval += record_stmt_cost (prologue_cost_vec,
3170                                   si->count * peel_iters_prologue,
3171                                   si->kind, NULL, si->misalign,
3172                                   vect_prologue);
3173   if (*peel_iters_epilogue)
3174     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3175       retval += record_stmt_cost (epilogue_cost_vec,
3176                                   si->count * *peel_iters_epilogue,
3177                                   si->kind, NULL, si->misalign,
3178                                   vect_epilogue);
3179
3180   return retval;
3181 }
3182
3183 /* Function vect_estimate_min_profitable_iters
3184
3185    Return the number of iterations required for the vector version of the
3186    loop to be profitable relative to the cost of the scalar version of the
3187    loop.
3188
3189    *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
3190    of iterations for vectorization.  -1 value means loop vectorization
3191    is not profitable.  This returned value may be used for dynamic
3192    profitability check.
3193
3194    *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
3195    for static check against estimated number of iterations.  */
3196
3197 static void
3198 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3199                                     int *ret_min_profitable_niters,
3200                                     int *ret_min_profitable_estimate)
3201 {
3202   int min_profitable_iters;
3203   int min_profitable_estimate;
3204   int peel_iters_prologue;
3205   int peel_iters_epilogue;
3206   unsigned vec_inside_cost = 0;
3207   int vec_outside_cost = 0;
3208   unsigned vec_prologue_cost = 0;
3209   unsigned vec_epilogue_cost = 0;
3210   int scalar_single_iter_cost = 0;
3211   int scalar_outside_cost = 0;
3212   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3213   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3214   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3215
3216   /* Cost model disabled.  */
3217   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3218     {
3219       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3220       *ret_min_profitable_niters = 0;
3221       *ret_min_profitable_estimate = 0;
3222       return;
3223     }
3224
3225   /* Requires loop versioning tests to handle misalignment.  */
3226   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3227     {
3228       /*  FIXME: Make cost depend on complexity of individual check.  */
3229       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3230       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3231                             vect_prologue);
3232       dump_printf (MSG_NOTE,
3233                    "cost model: Adding cost of checks for loop "
3234                    "versioning to treat misalignment.\n");
3235     }
3236
3237   /* Requires loop versioning with alias checks.  */
3238   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3239     {
3240       /*  FIXME: Make cost depend on complexity of individual check.  */
3241       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3242       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3243                             vect_prologue);
3244       dump_printf (MSG_NOTE,
3245                    "cost model: Adding cost of checks for loop "
3246                    "versioning aliasing.\n");
3247     }
3248
3249   /* Requires loop versioning with niter checks.  */
3250   if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
3251     {
3252       /*  FIXME: Make cost depend on complexity of individual check.  */
3253       (void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
3254                             vect_prologue);
3255       dump_printf (MSG_NOTE,
3256                    "cost model: Adding cost of checks for loop "
3257                    "versioning niters.\n");
3258     }
3259
3260   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3261     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3262                           vect_prologue);
3263
3264   /* Count statements in scalar loop.  Using this as scalar cost for a single
3265      iteration for now.
3266
3267      TODO: Add outer loop support.
3268
3269      TODO: Consider assigning different costs to different scalar
3270      statements.  */
3271
3272   scalar_single_iter_cost
3273     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3274
3275   /* Add additional cost for the peeled instructions in prologue and epilogue
3276      loop.
3277
3278      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3279      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3280
3281      TODO: Build an expression that represents peel_iters for prologue and
3282      epilogue to be used in a run-time test.  */
3283
3284   if (npeel  < 0)
3285     {
3286       peel_iters_prologue = vf/2;
3287       dump_printf (MSG_NOTE, "cost model: "
3288                    "prologue peel iters set to vf/2.\n");
3289
3290       /* If peeling for alignment is unknown, loop bound of main loop becomes
3291          unknown.  */
3292       peel_iters_epilogue = vf/2;
3293       dump_printf (MSG_NOTE, "cost model: "
3294                    "epilogue peel iters set to vf/2 because "
3295                    "peeling for alignment is unknown.\n");
3296
3297       /* If peeled iterations are unknown, count a taken branch and a not taken
3298          branch per peeled loop. Even if scalar loop iterations are known,
3299          vector iterations are not known since peeled prologue iterations are
3300          not known. Hence guards remain the same.  */
3301       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3302                             NULL, 0, vect_prologue);
3303       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3304                             NULL, 0, vect_prologue);
3305       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3306                             NULL, 0, vect_epilogue);
3307       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3308                             NULL, 0, vect_epilogue);
3309       stmt_info_for_cost *si;
3310       int j;
3311       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3312         {
3313           struct _stmt_vec_info *stmt_info
3314             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3315           (void) add_stmt_cost (target_cost_data,
3316                                 si->count * peel_iters_prologue,
3317                                 si->kind, stmt_info, si->misalign,
3318                                 vect_prologue);
3319           (void) add_stmt_cost (target_cost_data,
3320                                 si->count * peel_iters_epilogue,
3321                                 si->kind, stmt_info, si->misalign,
3322                                 vect_epilogue);
3323         }
3324     }
3325   else
3326     {
3327       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3328       stmt_info_for_cost *si;
3329       int j;
3330       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3331
3332       prologue_cost_vec.create (2);
3333       epilogue_cost_vec.create (2);
3334       peel_iters_prologue = npeel;
3335
3336       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3337                                           &peel_iters_epilogue,
3338                                           &LOOP_VINFO_SCALAR_ITERATION_COST
3339                                             (loop_vinfo),
3340                                           &prologue_cost_vec,
3341                                           &epilogue_cost_vec);
3342
3343       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3344         {
3345           struct _stmt_vec_info *stmt_info
3346             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3347           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3348                                 si->misalign, vect_prologue);
3349         }
3350
3351       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3352         {
3353           struct _stmt_vec_info *stmt_info
3354             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3355           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3356                                 si->misalign, vect_epilogue);
3357         }
3358
3359       prologue_cost_vec.release ();
3360       epilogue_cost_vec.release ();
3361     }
3362
3363   /* FORNOW: The scalar outside cost is incremented in one of the
3364      following ways:
3365
3366      1. The vectorizer checks for alignment and aliasing and generates
3367      a condition that allows dynamic vectorization.  A cost model
3368      check is ANDED with the versioning condition.  Hence scalar code
3369      path now has the added cost of the versioning check.
3370
3371        if (cost > th & versioning_check)
3372          jmp to vector code
3373
3374      Hence run-time scalar is incremented by not-taken branch cost.
3375
3376      2. The vectorizer then checks if a prologue is required.  If the
3377      cost model check was not done before during versioning, it has to
3378      be done before the prologue check.
3379
3380        if (cost <= th)
3381          prologue = scalar_iters
3382        if (prologue == 0)
3383          jmp to vector code
3384        else
3385          execute prologue
3386        if (prologue == num_iters)
3387          go to exit
3388
3389      Hence the run-time scalar cost is incremented by a taken branch,
3390      plus a not-taken branch, plus a taken branch cost.
3391
3392      3. The vectorizer then checks if an epilogue is required.  If the
3393      cost model check was not done before during prologue check, it
3394      has to be done with the epilogue check.
3395
3396        if (prologue == 0)
3397          jmp to vector code
3398        else
3399          execute prologue
3400        if (prologue == num_iters)
3401          go to exit
3402        vector code:
3403          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3404            jmp to epilogue
3405
3406      Hence the run-time scalar cost should be incremented by 2 taken
3407      branches.
3408
3409      TODO: The back end may reorder the BBS's differently and reverse
3410      conditions/branch directions.  Change the estimates below to
3411      something more reasonable.  */
3412
3413   /* If the number of iterations is known and we do not do versioning, we can
3414      decide whether to vectorize at compile time.  Hence the scalar version
3415      do not carry cost model guard costs.  */
3416   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3417       || LOOP_REQUIRES_VERSIONING (loop_vinfo))
3418     {
3419       /* Cost model check occurs at versioning.  */
3420       if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3421         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3422       else
3423         {
3424           /* Cost model check occurs at prologue generation.  */
3425           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3426             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3427               + vect_get_stmt_cost (cond_branch_not_taken);
3428           /* Cost model check occurs at epilogue generation.  */
3429           else
3430             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3431         }
3432     }
3433
3434   /* Complete the target-specific cost calculations.  */
3435   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3436                &vec_inside_cost, &vec_epilogue_cost);
3437
3438   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3439
3440   if (dump_enabled_p ())
3441     {
3442       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3443       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3444                    vec_inside_cost);
3445       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3446                    vec_prologue_cost);
3447       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3448                    vec_epilogue_cost);
3449       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3450                    scalar_single_iter_cost);
3451       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3452                    scalar_outside_cost);
3453       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3454                    vec_outside_cost);
3455       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3456                    peel_iters_prologue);
3457       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3458                    peel_iters_epilogue);
3459     }
3460
3461   /* Calculate number of iterations required to make the vector version
3462      profitable, relative to the loop bodies only.  The following condition
3463      must hold true:
3464      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3465      where
3466      SIC = scalar iteration cost, VIC = vector iteration cost,
3467      VOC = vector outside cost, VF = vectorization factor,
3468      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3469      SOC = scalar outside cost for run time cost model check.  */
3470
3471   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3472     {
3473       if (vec_outside_cost <= 0)
3474         min_profitable_iters = 1;
3475       else
3476         {
3477           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3478                                   - vec_inside_cost * peel_iters_prologue
3479                                   - vec_inside_cost * peel_iters_epilogue)
3480                                  / ((scalar_single_iter_cost * vf)
3481                                     - vec_inside_cost);
3482
3483           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3484               <= (((int) vec_inside_cost * min_profitable_iters)
3485                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3486             min_profitable_iters++;
3487         }
3488     }
3489   /* vector version will never be profitable.  */
3490   else
3491     {
3492       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3493         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3494                     "did not happen for a simd loop");
3495
3496       if (dump_enabled_p ())
3497         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3498                          "cost model: the vector iteration cost = %d "
3499                          "divided by the scalar iteration cost = %d "
3500                          "is greater or equal to the vectorization factor = %d"
3501                          ".\n",
3502                          vec_inside_cost, scalar_single_iter_cost, vf);
3503       *ret_min_profitable_niters = -1;
3504       *ret_min_profitable_estimate = -1;
3505       return;
3506     }
3507
3508   dump_printf (MSG_NOTE,
3509                "  Calculated minimum iters for profitability: %d\n",
3510                min_profitable_iters);
3511
3512   min_profitable_iters =
3513         min_profitable_iters < vf ? vf : min_profitable_iters;
3514
3515   /* Because the condition we create is:
3516      if (niters <= min_profitable_iters)
3517        then skip the vectorized loop.  */
3518   min_profitable_iters--;
3519
3520   if (dump_enabled_p ())
3521     dump_printf_loc (MSG_NOTE, vect_location,
3522                      "  Runtime profitability threshold = %d\n",
3523                      min_profitable_iters);
3524
3525   *ret_min_profitable_niters = min_profitable_iters;
3526
3527   /* Calculate number of iterations required to make the vector version
3528      profitable, relative to the loop bodies only.
3529
3530      Non-vectorized variant is SIC * niters and it must win over vector
3531      variant on the expected loop trip count.  The following condition must hold true:
3532      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3533
3534   if (vec_outside_cost <= 0)
3535     min_profitable_estimate = 1;
3536   else
3537     {
3538       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3539                                  - vec_inside_cost * peel_iters_prologue
3540                                  - vec_inside_cost * peel_iters_epilogue)
3541                                  / ((scalar_single_iter_cost * vf)
3542                                    - vec_inside_cost);
3543     }
3544   min_profitable_estimate --;
3545   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3546   if (dump_enabled_p ())
3547     dump_printf_loc (MSG_NOTE, vect_location,
3548                      "  Static estimate profitability threshold = %d\n",
3549                      min_profitable_estimate);
3550
3551   *ret_min_profitable_estimate = min_profitable_estimate;
3552 }
3553
3554 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3555    vector elements (not bits) for a vector of mode MODE.  */
3556 static void
3557 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3558                               unsigned char *sel)
3559 {
3560   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3561
3562   for (i = 0; i < nelt; i++)
3563     sel[i] = (i + offset) & (2*nelt - 1);
3564 }
3565
3566 /* Checks whether the target supports whole-vector shifts for vectors of mode
3567    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3568    it supports vec_perm_const with masks for all necessary shift amounts.  */
3569 static bool
3570 have_whole_vector_shift (enum machine_mode mode)
3571 {
3572   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3573     return true;
3574
3575   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3576     return false;
3577
3578   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3579   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3580
3581   for (i = nelt/2; i >= 1; i/=2)
3582     {
3583       calc_vec_perm_mask_for_shift (mode, i, sel);
3584       if (!can_vec_perm_p (mode, false, sel))
3585         return false;
3586     }
3587   return true;
3588 }
3589
3590 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3591
3592 static tree
3593 get_reduction_op (gimple *stmt, int reduc_index)
3594 {
3595   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3596     {
3597     case GIMPLE_SINGLE_RHS:
3598       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3599                   == ternary_op);
3600       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3601     case GIMPLE_UNARY_RHS:
3602       return gimple_assign_rhs1 (stmt);
3603     case GIMPLE_BINARY_RHS:
3604       return (reduc_index
3605               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3606     case GIMPLE_TERNARY_RHS:
3607       return gimple_op (stmt, reduc_index + 1);
3608     default:
3609       gcc_unreachable ();
3610     }
3611 }
3612
3613 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3614    functions. Design better to avoid maintenance issues.  */
3615
3616 /* Function vect_model_reduction_cost.
3617
3618    Models cost for a reduction operation, including the vector ops
3619    generated within the strip-mine loop, the initial definition before
3620    the loop, and the epilogue code that must be generated.  */
3621
3622 static bool
3623 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3624                            int ncopies, int reduc_index)
3625 {
3626   int prologue_cost = 0, epilogue_cost = 0;
3627   enum tree_code code;
3628   optab optab;
3629   tree vectype;
3630   gimple *stmt, *orig_stmt;
3631   tree reduction_op;
3632   machine_mode mode;
3633   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3634   struct loop *loop = NULL;
3635   void *target_cost_data;
3636
3637   if (loop_vinfo)
3638     {
3639       loop = LOOP_VINFO_LOOP (loop_vinfo);
3640       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3641     }
3642   else
3643     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3644
3645   /* Condition reductions generate two reductions in the loop.  */
3646   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3647     ncopies *= 2;
3648
3649   /* Cost of reduction op inside loop.  */
3650   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3651                                         stmt_info, 0, vect_body);
3652   stmt = STMT_VINFO_STMT (stmt_info);
3653
3654   reduction_op = get_reduction_op (stmt, reduc_index);
3655
3656   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3657   if (!vectype)
3658     {
3659       if (dump_enabled_p ())
3660         {
3661           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3662                            "unsupported data-type ");
3663           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3664                              TREE_TYPE (reduction_op));
3665           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3666         }
3667       return false;
3668    }
3669
3670   mode = TYPE_MODE (vectype);
3671   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3672
3673   if (!orig_stmt)
3674     orig_stmt = STMT_VINFO_STMT (stmt_info);
3675
3676   code = gimple_assign_rhs_code (orig_stmt);
3677
3678   /* Add in cost for initial definition.
3679      For cond reduction we have four vectors: initial index, step, initial
3680      result of the data reduction, initial value of the index reduction.  */
3681   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3682                        == COND_REDUCTION ? 4 : 1;
3683   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3684                                   scalar_to_vec, stmt_info, 0,
3685                                   vect_prologue);
3686
3687   /* Determine cost of epilogue code.
3688
3689      We have a reduction operator that will reduce the vector in one statement.
3690      Also requires scalar extract.  */
3691
3692   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3693     {
3694       if (reduc_code != ERROR_MARK)
3695         {
3696           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3697             {
3698               /* An EQ stmt and an COND_EXPR stmt.  */
3699               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3700                                               vector_stmt, stmt_info, 0,
3701                                               vect_epilogue);
3702               /* Reduction of the max index and a reduction of the found
3703                  values.  */
3704               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3705                                               vec_to_scalar, stmt_info, 0,
3706                                               vect_epilogue);
3707               /* A broadcast of the max value.  */
3708               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3709                                               scalar_to_vec, stmt_info, 0,
3710                                               vect_epilogue);
3711             }
3712           else
3713             {
3714               epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3715                                               stmt_info, 0, vect_epilogue);
3716               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3717                                               vec_to_scalar, stmt_info, 0,
3718                                               vect_epilogue);
3719             }
3720         }
3721       else
3722         {
3723           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3724           tree bitsize =
3725             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3726           int element_bitsize = tree_to_uhwi (bitsize);
3727           int nelements = vec_size_in_bits / element_bitsize;
3728
3729           optab = optab_for_tree_code (code, vectype, optab_default);
3730
3731           /* We have a whole vector shift available.  */
3732           if (VECTOR_MODE_P (mode)
3733               && optab_handler (optab, mode) != CODE_FOR_nothing
3734               && have_whole_vector_shift (mode))
3735             {
3736               /* Final reduction via vector shifts and the reduction operator.
3737                  Also requires scalar extract.  */
3738               epilogue_cost += add_stmt_cost (target_cost_data,
3739                                               exact_log2 (nelements) * 2,
3740                                               vector_stmt, stmt_info, 0,
3741                                               vect_epilogue);
3742               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3743                                               vec_to_scalar, stmt_info, 0,
3744                                               vect_epilogue);
3745             }
3746           else
3747             /* Use extracts and reduction op for final reduction.  For N
3748                elements, we have N extracts and N-1 reduction ops.  */
3749             epilogue_cost += add_stmt_cost (target_cost_data,
3750                                             nelements + nelements - 1,
3751                                             vector_stmt, stmt_info, 0,
3752                                             vect_epilogue);
3753         }
3754     }
3755
3756   if (dump_enabled_p ())
3757     dump_printf (MSG_NOTE,
3758                  "vect_model_reduction_cost: inside_cost = %d, "
3759                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3760                  prologue_cost, epilogue_cost);
3761
3762   return true;
3763 }
3764
3765
3766 /* Function vect_model_induction_cost.
3767
3768    Models cost for induction operations.  */
3769
3770 static void
3771 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3772 {
3773   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3774   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3775   unsigned inside_cost, prologue_cost;
3776
3777   /* loop cost for vec_loop.  */
3778   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3779                                stmt_info, 0, vect_body);
3780
3781   /* prologue cost for vec_init and vec_step.  */
3782   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3783                                  stmt_info, 0, vect_prologue);
3784
3785   if (dump_enabled_p ())
3786     dump_printf_loc (MSG_NOTE, vect_location,
3787                      "vect_model_induction_cost: inside_cost = %d, "
3788                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3789 }
3790
3791
3792 /* Function get_initial_def_for_induction
3793
3794    Input:
3795    STMT - a stmt that performs an induction operation in the loop.
3796    IV_PHI - the initial value of the induction variable
3797
3798    Output:
3799    Return a vector variable, initialized with the first VF values of
3800    the induction variable.  E.g., for an iv with IV_PHI='X' and
3801    evolution S, for a vector of 4 units, we want to return:
3802    [X, X + S, X + 2*S, X + 3*S].  */
3803
3804 static tree
3805 get_initial_def_for_induction (gimple *iv_phi)
3806 {
3807   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3808   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3809   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3810   tree vectype;
3811   int nunits;
3812   edge pe = loop_preheader_edge (loop);
3813   struct loop *iv_loop;
3814   basic_block new_bb;
3815   tree new_vec, vec_init, vec_step, t;
3816   tree new_name;
3817   gimple *new_stmt;
3818   gphi *induction_phi;
3819   tree induc_def, vec_def, vec_dest;
3820   tree init_expr, step_expr;
3821   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3822   int i;
3823   int ncopies;
3824   tree expr;
3825   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3826   bool nested_in_vect_loop = false;
3827   gimple_seq stmts;
3828   imm_use_iterator imm_iter;
3829   use_operand_p use_p;
3830   gimple *exit_phi;
3831   edge latch_e;
3832   tree loop_arg;
3833   gimple_stmt_iterator si;
3834   basic_block bb = gimple_bb (iv_phi);
3835   tree stepvectype;
3836   tree resvectype;
3837
3838   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3839   if (nested_in_vect_loop_p (loop, iv_phi))
3840     {
3841       nested_in_vect_loop = true;
3842       iv_loop = loop->inner;
3843     }
3844   else
3845     iv_loop = loop;
3846   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3847
3848   latch_e = loop_latch_edge (iv_loop);
3849   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3850
3851   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3852   gcc_assert (step_expr != NULL_TREE);
3853
3854   pe = loop_preheader_edge (iv_loop);
3855   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3856                                      loop_preheader_edge (iv_loop));
3857
3858   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3859   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3860   gcc_assert (vectype);
3861   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3862   ncopies = vf / nunits;
3863
3864   gcc_assert (phi_info);
3865   gcc_assert (ncopies >= 1);
3866
3867   /* Convert the step to the desired type.  */
3868   stmts = NULL;
3869   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
3870   if (stmts)
3871     {
3872       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3873       gcc_assert (!new_bb);
3874     }
3875
3876   /* Find the first insertion point in the BB.  */
3877   si = gsi_after_labels (bb);
3878
3879   /* Create the vector that holds the initial_value of the induction.  */
3880   if (nested_in_vect_loop)
3881     {
3882       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3883          been created during vectorization of previous stmts.  We obtain it
3884          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3885       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi);
3886       /* If the initial value is not of proper type, convert it.  */
3887       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3888         {
3889           new_stmt
3890             = gimple_build_assign (vect_get_new_ssa_name (vectype,
3891                                                           vect_simple_var,
3892                                                           "vec_iv_"),
3893                                    VIEW_CONVERT_EXPR,
3894                                    build1 (VIEW_CONVERT_EXPR, vectype,
3895                                            vec_init));
3896           vec_init = gimple_assign_lhs (new_stmt);
3897           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3898                                                  new_stmt);
3899           gcc_assert (!new_bb);
3900           set_vinfo_for_stmt (new_stmt,
3901                               new_stmt_vec_info (new_stmt, loop_vinfo));
3902         }
3903     }
3904   else
3905     {
3906       vec<constructor_elt, va_gc> *v;
3907
3908       /* iv_loop is the loop to be vectorized. Create:
3909          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3910       stmts = NULL;
3911       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
3912
3913       vec_alloc (v, nunits);
3914       bool constant_p = is_gimple_min_invariant (new_name);
3915       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3916       for (i = 1; i < nunits; i++)
3917         {
3918           /* Create: new_name_i = new_name + step_expr  */
3919           new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
3920                                    new_name, step_expr);
3921           if (!is_gimple_min_invariant (new_name))
3922             constant_p = false;
3923           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3924         }
3925       if (stmts)
3926         {
3927           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3928           gcc_assert (!new_bb);
3929         }
3930
3931       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3932       if (constant_p)
3933         new_vec = build_vector_from_ctor (vectype, v);
3934       else
3935         new_vec = build_constructor (vectype, v);
3936       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3937     }
3938
3939
3940   /* Create the vector that holds the step of the induction.  */
3941   if (nested_in_vect_loop)
3942     /* iv_loop is nested in the loop to be vectorized. Generate:
3943        vec_step = [S, S, S, S]  */
3944     new_name = step_expr;
3945   else
3946     {
3947       /* iv_loop is the loop to be vectorized. Generate:
3948           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3949       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3950         {
3951           expr = build_int_cst (integer_type_node, vf);
3952           expr = fold_convert (TREE_TYPE (step_expr), expr);
3953         }
3954       else
3955         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3956       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3957                               expr, step_expr);
3958       if (TREE_CODE (step_expr) == SSA_NAME)
3959         new_name = vect_init_vector (iv_phi, new_name,
3960                                      TREE_TYPE (step_expr), NULL);
3961     }
3962
3963   t = unshare_expr (new_name);
3964   gcc_assert (CONSTANT_CLASS_P (new_name)
3965               || TREE_CODE (new_name) == SSA_NAME);
3966   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3967   gcc_assert (stepvectype);
3968   new_vec = build_vector_from_val (stepvectype, t);
3969   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3970
3971
3972   /* Create the following def-use cycle:
3973      loop prolog:
3974          vec_init = ...
3975          vec_step = ...
3976      loop:
3977          vec_iv = PHI <vec_init, vec_loop>
3978          ...
3979          STMT
3980          ...
3981          vec_loop = vec_iv + vec_step;  */
3982
3983   /* Create the induction-phi that defines the induction-operand.  */
3984   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3985   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3986   set_vinfo_for_stmt (induction_phi,
3987                       new_stmt_vec_info (induction_phi, loop_vinfo));
3988   induc_def = PHI_RESULT (induction_phi);
3989
3990   /* Create the iv update inside the loop  */
3991   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3992   vec_def = make_ssa_name (vec_dest, new_stmt);
3993   gimple_assign_set_lhs (new_stmt, vec_def);
3994   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3995   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
3996
3997   /* Set the arguments of the phi node:  */
3998   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3999   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
4000                UNKNOWN_LOCATION);
4001
4002
4003   /* In case that vectorization factor (VF) is bigger than the number
4004      of elements that we can fit in a vectype (nunits), we have to generate
4005      more than one vector stmt - i.e - we need to "unroll" the
4006      vector stmt by a factor VF/nunits.  For more details see documentation
4007      in vectorizable_operation.  */
4008
4009   if (ncopies > 1)
4010     {
4011       stmt_vec_info prev_stmt_vinfo;
4012       /* FORNOW. This restriction should be relaxed.  */
4013       gcc_assert (!nested_in_vect_loop);
4014
4015       /* Create the vector that holds the step of the induction.  */
4016       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
4017         {
4018           expr = build_int_cst (integer_type_node, nunits);
4019           expr = fold_convert (TREE_TYPE (step_expr), expr);
4020         }
4021       else
4022         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
4023       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
4024                               expr, step_expr);
4025       if (TREE_CODE (step_expr) == SSA_NAME)
4026         new_name = vect_init_vector (iv_phi, new_name,
4027                                      TREE_TYPE (step_expr), NULL);
4028       t = unshare_expr (new_name);
4029       gcc_assert (CONSTANT_CLASS_P (new_name)
4030                   || TREE_CODE (new_name) == SSA_NAME);
4031       new_vec = build_vector_from_val (stepvectype, t);
4032       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
4033
4034       vec_def = induc_def;
4035       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
4036       for (i = 1; i < ncopies; i++)
4037         {
4038           /* vec_i = vec_prev + vec_step  */
4039           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
4040                                           vec_def, vec_step);
4041           vec_def = make_ssa_name (vec_dest, new_stmt);
4042           gimple_assign_set_lhs (new_stmt, vec_def);
4043
4044           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4045           if (!useless_type_conversion_p (resvectype, vectype))
4046             {
4047               new_stmt
4048                 = gimple_build_assign
4049                         (vect_get_new_vect_var (resvectype, vect_simple_var,
4050                                                 "vec_iv_"),
4051                          VIEW_CONVERT_EXPR,
4052                          build1 (VIEW_CONVERT_EXPR, resvectype,
4053                                  gimple_assign_lhs (new_stmt)));
4054               gimple_assign_set_lhs (new_stmt,
4055                                      make_ssa_name
4056                                        (gimple_assign_lhs (new_stmt), new_stmt));
4057               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4058             }
4059           set_vinfo_for_stmt (new_stmt,
4060                               new_stmt_vec_info (new_stmt, loop_vinfo));
4061           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
4062           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
4063         }
4064     }
4065
4066   if (nested_in_vect_loop)
4067     {
4068       /* Find the loop-closed exit-phi of the induction, and record
4069          the final vector of induction results:  */
4070       exit_phi = NULL;
4071       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
4072         {
4073           gimple *use_stmt = USE_STMT (use_p);
4074           if (is_gimple_debug (use_stmt))
4075             continue;
4076
4077           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
4078             {
4079               exit_phi = use_stmt;
4080               break;
4081             }
4082         }
4083       if (exit_phi)
4084         {
4085           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
4086           /* FORNOW. Currently not supporting the case that an inner-loop induction
4087              is not used in the outer-loop (i.e. only outside the outer-loop).  */
4088           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
4089                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
4090
4091           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
4092           if (dump_enabled_p ())
4093             {
4094               dump_printf_loc (MSG_NOTE, vect_location,
4095                                "vector of inductions after inner-loop:");
4096               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
4097             }
4098         }
4099     }
4100
4101
4102   if (dump_enabled_p ())
4103     {
4104       dump_printf_loc (MSG_NOTE, vect_location,
4105                        "transform induction: created def-use cycle: ");
4106       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
4107       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
4108                         SSA_NAME_DEF_STMT (vec_def), 0);
4109     }
4110
4111   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
4112   if (!useless_type_conversion_p (resvectype, vectype))
4113     {
4114       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
4115                                                              vect_simple_var,
4116                                                              "vec_iv_"),
4117                                       VIEW_CONVERT_EXPR,
4118                                       build1 (VIEW_CONVERT_EXPR, resvectype,
4119                                               induc_def));
4120       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
4121       gimple_assign_set_lhs (new_stmt, induc_def);
4122       si = gsi_after_labels (bb);
4123       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
4124       set_vinfo_for_stmt (new_stmt,
4125                           new_stmt_vec_info (new_stmt, loop_vinfo));
4126       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
4127         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
4128     }
4129
4130   return induc_def;
4131 }
4132
4133
4134 /* Function get_initial_def_for_reduction
4135
4136    Input:
4137    STMT - a stmt that performs a reduction operation in the loop.
4138    INIT_VAL - the initial value of the reduction variable
4139
4140    Output:
4141    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
4142         of the reduction (used for adjusting the epilog - see below).
4143    Return a vector variable, initialized according to the operation that STMT
4144         performs. This vector will be used as the initial value of the
4145         vector of partial results.
4146
4147    Option1 (adjust in epilog): Initialize the vector as follows:
4148      add/bit or/xor:    [0,0,...,0,0]
4149      mult/bit and:      [1,1,...,1,1]
4150      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
4151    and when necessary (e.g. add/mult case) let the caller know
4152    that it needs to adjust the result by init_val.
4153
4154    Option2: Initialize the vector as follows:
4155      add/bit or/xor:    [init_val,0,0,...,0]
4156      mult/bit and:      [init_val,1,1,...,1]
4157      min/max/cond_expr: [init_val,init_val,...,init_val]
4158    and no adjustments are needed.
4159
4160    For example, for the following code:
4161
4162    s = init_val;
4163    for (i=0;i<n;i++)
4164      s = s + a[i];
4165
4166    STMT is 's = s + a[i]', and the reduction variable is 's'.
4167    For a vector of 4 units, we want to return either [0,0,0,init_val],
4168    or [0,0,0,0] and let the caller know that it needs to adjust
4169    the result at the end by 'init_val'.
4170
4171    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
4172    initialization vector is simpler (same element in all entries), if
4173    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
4174
4175    A cost model should help decide between these two schemes.  */
4176
4177 tree
4178 get_initial_def_for_reduction (gimple *stmt, tree init_val,
4179                                tree *adjustment_def)
4180 {
4181   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
4182   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4183   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4184   tree scalar_type = TREE_TYPE (init_val);
4185   tree vectype = get_vectype_for_scalar_type (scalar_type);
4186   int nunits;
4187   enum tree_code code = gimple_assign_rhs_code (stmt);
4188   tree def_for_init;
4189   tree init_def;
4190   tree *elts;
4191   int i;
4192   bool nested_in_vect_loop = false;
4193   REAL_VALUE_TYPE real_init_val = dconst0;
4194   int int_init_val = 0;
4195   gimple *def_stmt = NULL;
4196   gimple_seq stmts = NULL;
4197
4198   gcc_assert (vectype);
4199   nunits = TYPE_VECTOR_SUBPARTS (vectype);
4200
4201   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
4202               || SCALAR_FLOAT_TYPE_P (scalar_type));
4203
4204   if (nested_in_vect_loop_p (loop, stmt))
4205     nested_in_vect_loop = true;
4206   else
4207     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
4208
4209   /* In case of double reduction we only create a vector variable to be put
4210      in the reduction phi node.  The actual statement creation is done in
4211      vect_create_epilog_for_reduction.  */
4212   if (adjustment_def && nested_in_vect_loop
4213       && TREE_CODE (init_val) == SSA_NAME
4214       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
4215       && gimple_code (def_stmt) == GIMPLE_PHI
4216       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
4217       && vinfo_for_stmt (def_stmt)
4218       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
4219           == vect_double_reduction_def)
4220     {
4221       *adjustment_def = NULL;
4222       return vect_create_destination_var (init_val, vectype);
4223     }
4224
4225   /* In case of a nested reduction do not use an adjustment def as
4226      that case is not supported by the epilogue generation correctly
4227      if ncopies is not one.  */
4228   if (adjustment_def && nested_in_vect_loop)
4229     {
4230       *adjustment_def = NULL;
4231       return vect_get_vec_def_for_operand (init_val, stmt);
4232     }
4233
4234   switch (code)
4235     {
4236       case WIDEN_SUM_EXPR:
4237       case DOT_PROD_EXPR:
4238       case SAD_EXPR:
4239       case PLUS_EXPR:
4240       case MINUS_EXPR:
4241       case BIT_IOR_EXPR:
4242       case BIT_XOR_EXPR:
4243       case MULT_EXPR:
4244       case BIT_AND_EXPR:
4245         /* ADJUSMENT_DEF is NULL when called from
4246            vect_create_epilog_for_reduction to vectorize double reduction.  */
4247         if (adjustment_def)
4248           *adjustment_def = init_val;
4249
4250         if (code == MULT_EXPR)
4251           {
4252             real_init_val = dconst1;
4253             int_init_val = 1;
4254           }
4255
4256         if (code == BIT_AND_EXPR)
4257           int_init_val = -1;
4258
4259         if (SCALAR_FLOAT_TYPE_P (scalar_type))
4260           def_for_init = build_real (scalar_type, real_init_val);
4261         else
4262           def_for_init = build_int_cst (scalar_type, int_init_val);
4263
4264         /* Create a vector of '0' or '1' except the first element.  */
4265         elts = XALLOCAVEC (tree, nunits);
4266         for (i = nunits - 2; i >= 0; --i)
4267           elts[i + 1] = def_for_init;
4268
4269         /* Option1: the first element is '0' or '1' as well.  */
4270         if (adjustment_def)
4271           {
4272             elts[0] = def_for_init;
4273             init_def = build_vector (vectype, elts);
4274             break;
4275           }
4276
4277         /* Option2: the first element is INIT_VAL.  */
4278         elts[0] = init_val;
4279         if (TREE_CONSTANT (init_val))
4280           init_def = build_vector (vectype, elts);
4281         else
4282           {
4283             vec<constructor_elt, va_gc> *v;
4284             vec_alloc (v, nunits);
4285             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4286             for (i = 1; i < nunits; ++i)
4287               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4288             init_def = build_constructor (vectype, v);
4289           }
4290
4291         break;
4292
4293       case MIN_EXPR:
4294       case MAX_EXPR:
4295       case COND_EXPR:
4296         if (adjustment_def)
4297           {
4298             *adjustment_def = NULL_TREE;
4299             if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4300               {
4301                 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4302                 break;
4303               }
4304           }
4305         init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
4306         if (! gimple_seq_empty_p (stmts))
4307           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4308         init_def = build_vector_from_val (vectype, init_val);
4309         break;
4310
4311       default:
4312         gcc_unreachable ();
4313     }
4314
4315   return init_def;
4316 }
4317
4318 /* Function vect_create_epilog_for_reduction
4319
4320    Create code at the loop-epilog to finalize the result of a reduction
4321    computation.
4322
4323    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4324      reduction statements.
4325    STMT is the scalar reduction stmt that is being vectorized.
4326    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4327      number of elements that we can fit in a vectype (nunits).  In this case
4328      we have to generate more than one vector stmt - i.e - we need to "unroll"
4329      the vector stmt by a factor VF/nunits.  For more details see documentation
4330      in vectorizable_operation.
4331    REDUC_CODE is the tree-code for the epilog reduction.
4332    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4333      computation.
4334    REDUC_INDEX is the index of the operand in the right hand side of the
4335      statement that is defined by REDUCTION_PHI.
4336    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4337    SLP_NODE is an SLP node containing a group of reduction statements. The
4338      first one in this group is STMT.
4339    INDUCTION_INDEX is the index of the loop for condition reductions.
4340      Otherwise it is undefined.
4341
4342    This function:
4343    1. Creates the reduction def-use cycles: sets the arguments for
4344       REDUCTION_PHIS:
4345       The loop-entry argument is the vectorized initial-value of the reduction.
4346       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4347       sums.
4348    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4349       by applying the operation specified by REDUC_CODE if available, or by
4350       other means (whole-vector shifts or a scalar loop).
4351       The function also creates a new phi node at the loop exit to preserve
4352       loop-closed form, as illustrated below.
4353
4354      The flow at the entry to this function:
4355
4356         loop:
4357           vec_def = phi <null, null>            # REDUCTION_PHI
4358           VECT_DEF = vector_stmt                # vectorized form of STMT
4359           s_loop = scalar_stmt                  # (scalar) STMT
4360         loop_exit:
4361           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4362           use <s_out0>
4363           use <s_out0>
4364
4365      The above is transformed by this function into:
4366
4367         loop:
4368           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4369           VECT_DEF = vector_stmt                # vectorized form of STMT
4370           s_loop = scalar_stmt                  # (scalar) STMT
4371         loop_exit:
4372           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4373           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4374           v_out2 = reduce <v_out1>
4375           s_out3 = extract_field <v_out2, 0>
4376           s_out4 = adjust_result <s_out3>
4377           use <s_out4>
4378           use <s_out4>
4379 */
4380
4381 static void
4382 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4383                                   int ncopies, enum tree_code reduc_code,
4384                                   vec<gimple *> reduction_phis,
4385                                   int reduc_index, bool double_reduc,
4386                                   slp_tree slp_node, tree induction_index)
4387 {
4388   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4389   stmt_vec_info prev_phi_info;
4390   tree vectype;
4391   machine_mode mode;
4392   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4393   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4394   basic_block exit_bb;
4395   tree scalar_dest;
4396   tree scalar_type;
4397   gimple *new_phi = NULL, *phi;
4398   gimple_stmt_iterator exit_gsi;
4399   tree vec_dest;
4400   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4401   gimple *epilog_stmt = NULL;
4402   enum tree_code code = gimple_assign_rhs_code (stmt);
4403   gimple *exit_phi;
4404   tree bitsize;
4405   tree adjustment_def = NULL;
4406   tree vec_initial_def = NULL;
4407   tree reduction_op, expr, def, initial_def = NULL;
4408   tree orig_name, scalar_result;
4409   imm_use_iterator imm_iter, phi_imm_iter;
4410   use_operand_p use_p, phi_use_p;
4411   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4412   bool nested_in_vect_loop = false;
4413   auto_vec<gimple *> new_phis;
4414   auto_vec<gimple *> inner_phis;
4415   enum vect_def_type dt = vect_unknown_def_type;
4416   int j, i;
4417   auto_vec<tree> scalar_results;
4418   unsigned int group_size = 1, k, ratio;
4419   auto_vec<tree> vec_initial_defs;
4420   auto_vec<gimple *> phis;
4421   bool slp_reduc = false;
4422   tree new_phi_result;
4423   gimple *inner_phi = NULL;
4424
4425   if (slp_node)
4426     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4427
4428   if (nested_in_vect_loop_p (loop, stmt))
4429     {
4430       outer_loop = loop;
4431       loop = loop->inner;
4432       nested_in_vect_loop = true;
4433       gcc_assert (!slp_node);
4434     }
4435
4436   reduction_op = get_reduction_op (stmt, reduc_index);
4437
4438   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4439   gcc_assert (vectype);
4440   mode = TYPE_MODE (vectype);
4441
4442   /* 1. Create the reduction def-use cycle:
4443      Set the arguments of REDUCTION_PHIS, i.e., transform
4444
4445         loop:
4446           vec_def = phi <null, null>            # REDUCTION_PHI
4447           VECT_DEF = vector_stmt                # vectorized form of STMT
4448           ...
4449
4450      into:
4451
4452         loop:
4453           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4454           VECT_DEF = vector_stmt                # vectorized form of STMT
4455           ...
4456
4457      (in case of SLP, do it for all the phis). */
4458
4459   /* Get the loop-entry arguments.  */
4460   enum vect_def_type initial_def_dt = vect_unknown_def_type;
4461   if (slp_node)
4462     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4463                        NULL, slp_node, reduc_index);
4464   else
4465     {
4466       /* Get at the scalar def before the loop, that defines the initial value
4467          of the reduction variable.  */
4468       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4469       initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4470                                            loop_preheader_edge (loop));
4471       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
4472       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4473                                                        &adjustment_def);
4474       vec_initial_defs.create (1);
4475       vec_initial_defs.quick_push (vec_initial_def);
4476     }
4477
4478   /* Set phi nodes arguments.  */
4479   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4480     {
4481       tree vec_init_def, def;
4482       gimple_seq stmts;
4483       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4484                                            true, NULL_TREE);
4485       if (stmts)
4486         gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4487
4488       def = vect_defs[i];
4489       for (j = 0; j < ncopies; j++)
4490         {
4491           if (j != 0)
4492             {
4493               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4494               if (nested_in_vect_loop)
4495                 vec_init_def
4496                   = vect_get_vec_def_for_stmt_copy (initial_def_dt,
4497                                                     vec_init_def);
4498             }
4499
4500           /* Set the loop-entry arg of the reduction-phi.  */
4501
4502           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4503               == INTEGER_INDUC_COND_REDUCTION)
4504             {
4505               /* Initialise the reduction phi to zero.  This prevents initial
4506                  values of non-zero interferring with the reduction op.  */
4507               gcc_assert (ncopies == 1);
4508               gcc_assert (i == 0);
4509
4510               tree vec_init_def_type = TREE_TYPE (vec_init_def);
4511               tree zero_vec = build_zero_cst (vec_init_def_type);
4512
4513               add_phi_arg (as_a <gphi *> (phi), zero_vec,
4514                            loop_preheader_edge (loop), UNKNOWN_LOCATION);
4515             }
4516           else
4517             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4518                          loop_preheader_edge (loop), UNKNOWN_LOCATION);
4519
4520           /* Set the loop-latch arg for the reduction-phi.  */
4521           if (j > 0)
4522             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4523
4524           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4525                        UNKNOWN_LOCATION);
4526
4527           if (dump_enabled_p ())
4528             {
4529               dump_printf_loc (MSG_NOTE, vect_location,
4530                                "transform reduction: created def-use cycle: ");
4531               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4532               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4533             }
4534         }
4535     }
4536
4537   /* 2. Create epilog code.
4538         The reduction epilog code operates across the elements of the vector
4539         of partial results computed by the vectorized loop.
4540         The reduction epilog code consists of:
4541
4542         step 1: compute the scalar result in a vector (v_out2)
4543         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4544         step 3: adjust the scalar result (s_out3) if needed.
4545
4546         Step 1 can be accomplished using one the following three schemes:
4547           (scheme 1) using reduc_code, if available.
4548           (scheme 2) using whole-vector shifts, if available.
4549           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4550                      combined.
4551
4552           The overall epilog code looks like this:
4553
4554           s_out0 = phi <s_loop>         # original EXIT_PHI
4555           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4556           v_out2 = reduce <v_out1>              # step 1
4557           s_out3 = extract_field <v_out2, 0>    # step 2
4558           s_out4 = adjust_result <s_out3>       # step 3
4559
4560           (step 3 is optional, and steps 1 and 2 may be combined).
4561           Lastly, the uses of s_out0 are replaced by s_out4.  */
4562
4563
4564   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4565          v_out1 = phi <VECT_DEF>
4566          Store them in NEW_PHIS.  */
4567
4568   exit_bb = single_exit (loop)->dest;
4569   prev_phi_info = NULL;
4570   new_phis.create (vect_defs.length ());
4571   FOR_EACH_VEC_ELT (vect_defs, i, def)
4572     {
4573       for (j = 0; j < ncopies; j++)
4574         {
4575           tree new_def = copy_ssa_name (def);
4576           phi = create_phi_node (new_def, exit_bb);
4577           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4578           if (j == 0)
4579             new_phis.quick_push (phi);
4580           else
4581             {
4582               def = vect_get_vec_def_for_stmt_copy (dt, def);
4583               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4584             }
4585
4586           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4587           prev_phi_info = vinfo_for_stmt (phi);
4588         }
4589     }
4590
4591   /* The epilogue is created for the outer-loop, i.e., for the loop being
4592      vectorized.  Create exit phis for the outer loop.  */
4593   if (double_reduc)
4594     {
4595       loop = outer_loop;
4596       exit_bb = single_exit (loop)->dest;
4597       inner_phis.create (vect_defs.length ());
4598       FOR_EACH_VEC_ELT (new_phis, i, phi)
4599         {
4600           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4601           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4602           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4603                            PHI_RESULT (phi));
4604           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4605                                                             loop_vinfo));
4606           inner_phis.quick_push (phi);
4607           new_phis[i] = outer_phi;
4608           prev_phi_info = vinfo_for_stmt (outer_phi);
4609           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4610             {
4611               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4612               new_result = copy_ssa_name (PHI_RESULT (phi));
4613               outer_phi = create_phi_node (new_result, exit_bb);
4614               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4615                                PHI_RESULT (phi));
4616               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4617                                                                 loop_vinfo));
4618               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4619               prev_phi_info = vinfo_for_stmt (outer_phi);
4620             }
4621         }
4622     }
4623
4624   exit_gsi = gsi_after_labels (exit_bb);
4625
4626   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4627          (i.e. when reduc_code is not available) and in the final adjustment
4628          code (if needed).  Also get the original scalar reduction variable as
4629          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4630          represents a reduction pattern), the tree-code and scalar-def are
4631          taken from the original stmt that the pattern-stmt (STMT) replaces.
4632          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4633          are taken from STMT.  */
4634
4635   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4636   if (!orig_stmt)
4637     {
4638       /* Regular reduction  */
4639       orig_stmt = stmt;
4640     }
4641   else
4642     {
4643       /* Reduction pattern  */
4644       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4645       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4646       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4647     }
4648
4649   code = gimple_assign_rhs_code (orig_stmt);
4650   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4651      partial results are added and not subtracted.  */
4652   if (code == MINUS_EXPR)
4653     code = PLUS_EXPR;
4654
4655   scalar_dest = gimple_assign_lhs (orig_stmt);
4656   scalar_type = TREE_TYPE (scalar_dest);
4657   scalar_results.create (group_size);
4658   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4659   bitsize = TYPE_SIZE (scalar_type);
4660
4661   /* In case this is a reduction in an inner-loop while vectorizing an outer
4662      loop - we don't need to extract a single scalar result at the end of the
4663      inner-loop (unless it is double reduction, i.e., the use of reduction is
4664      outside the outer-loop).  The final vector of partial results will be used
4665      in the vectorized outer-loop, or reduced to a scalar result at the end of
4666      the outer-loop.  */
4667   if (nested_in_vect_loop && !double_reduc)
4668     goto vect_finalize_reduction;
4669
4670   /* SLP reduction without reduction chain, e.g.,
4671      # a1 = phi <a2, a0>
4672      # b1 = phi <b2, b0>
4673      a2 = operation (a1)
4674      b2 = operation (b1)  */
4675   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4676
4677   /* In case of reduction chain, e.g.,
4678      # a1 = phi <a3, a0>
4679      a2 = operation (a1)
4680      a3 = operation (a2),
4681
4682      we may end up with more than one vector result.  Here we reduce them to
4683      one vector.  */
4684   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4685     {
4686       tree first_vect = PHI_RESULT (new_phis[0]);
4687       tree tmp;
4688       gassign *new_vec_stmt = NULL;
4689
4690       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4691       for (k = 1; k < new_phis.length (); k++)
4692         {
4693           gimple *next_phi = new_phis[k];
4694           tree second_vect = PHI_RESULT (next_phi);
4695
4696           tmp = build2 (code, vectype,  first_vect, second_vect);
4697           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4698           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4699           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4700           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4701         }
4702
4703       new_phi_result = first_vect;
4704       if (new_vec_stmt)
4705         {
4706           new_phis.truncate (0);
4707           new_phis.safe_push (new_vec_stmt);
4708         }
4709     }
4710   else
4711     new_phi_result = PHI_RESULT (new_phis[0]);
4712
4713   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4714     {
4715       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4716          various data values where the condition matched and another vector
4717          (INDUCTION_INDEX) containing all the indexes of those matches.  We
4718          need to extract the last matching index (which will be the index with
4719          highest value) and use this to index into the data vector.
4720          For the case where there were no matches, the data vector will contain
4721          all default values and the index vector will be all zeros.  */
4722
4723       /* Get various versions of the type of the vector of indexes.  */
4724       tree index_vec_type = TREE_TYPE (induction_index);
4725       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4726       tree index_scalar_type = TREE_TYPE (index_vec_type);
4727       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4728         (index_vec_type);
4729
4730       /* Get an unsigned integer version of the type of the data vector.  */
4731       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4732       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4733       tree vectype_unsigned = build_vector_type
4734         (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4735
4736       /* First we need to create a vector (ZERO_VEC) of zeros and another
4737          vector (MAX_INDEX_VEC) filled with the last matching index, which we
4738          can create using a MAX reduction and then expanding.
4739          In the case where the loop never made any matches, the max index will
4740          be zero.  */
4741
4742       /* Vector of {0, 0, 0,...}.  */
4743       tree zero_vec = make_ssa_name (vectype);
4744       tree zero_vec_rhs = build_zero_cst (vectype);
4745       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4746       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4747
4748       /* Find maximum value from the vector of found indexes.  */
4749       tree max_index = make_ssa_name (index_scalar_type);
4750       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4751                                                     induction_index);
4752       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4753
4754       /* Vector of {max_index, max_index, max_index,...}.  */
4755       tree max_index_vec = make_ssa_name (index_vec_type);
4756       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4757                                                       max_index);
4758       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4759                                                         max_index_vec_rhs);
4760       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4761
4762       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4763          with the vector (INDUCTION_INDEX) of found indexes, choosing values
4764          from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4765          otherwise.  Only one value should match, resulting in a vector
4766          (VEC_COND) with one data value and the rest zeros.
4767          In the case where the loop never made any matches, every index will
4768          match, resulting in a vector with all data values (which will all be
4769          the default value).  */
4770
4771       /* Compare the max index vector to the vector of found indexes to find
4772          the position of the max value.  */
4773       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4774       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4775                                                       induction_index,
4776                                                       max_index_vec);
4777       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4778
4779       /* Use the compare to choose either values from the data vector or
4780          zero.  */
4781       tree vec_cond = make_ssa_name (vectype);
4782       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4783                                                    vec_compare, new_phi_result,
4784                                                    zero_vec);
4785       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4786
4787       /* Finally we need to extract the data value from the vector (VEC_COND)
4788          into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4789          reduction, but because this doesn't exist, we can use a MAX reduction
4790          instead.  The data value might be signed or a float so we need to cast
4791          it first.
4792          In the case where the loop never made any matches, the data values are
4793          all identical, and so will reduce down correctly.  */
4794
4795       /* Make the matched data values unsigned.  */
4796       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4797       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4798                                        vec_cond);
4799       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4800                                                         VIEW_CONVERT_EXPR,
4801                                                         vec_cond_cast_rhs);
4802       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4803
4804       /* Reduce down to a scalar value.  */
4805       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4806       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4807                                       optab_default);
4808       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4809                   != CODE_FOR_nothing);
4810       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4811                                                      REDUC_MAX_EXPR,
4812                                                      vec_cond_cast);
4813       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4814
4815       /* Convert the reduced value back to the result type and set as the
4816          result.  */
4817       tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4818                                      data_reduc);
4819       epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4820       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4821       gimple_assign_set_lhs (epilog_stmt, new_temp);
4822       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4823       scalar_results.safe_push (new_temp);
4824     }
4825
4826   /* 2.3 Create the reduction code, using one of the three schemes described
4827          above. In SLP we simply need to extract all the elements from the
4828          vector (without reducing them), so we use scalar shifts.  */
4829   else if (reduc_code != ERROR_MARK && !slp_reduc)
4830     {
4831       tree tmp;
4832       tree vec_elem_type;
4833
4834       /*** Case 1:  Create:
4835            v_out2 = reduc_expr <v_out1>  */
4836
4837       if (dump_enabled_p ())
4838         dump_printf_loc (MSG_NOTE, vect_location,
4839                          "Reduce using direct vector reduction.\n");
4840
4841       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4842       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4843         {
4844           tree tmp_dest =
4845               vect_create_destination_var (scalar_dest, vec_elem_type);
4846           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4847           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4848           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4849           gimple_assign_set_lhs (epilog_stmt, new_temp);
4850           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4851
4852           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4853         }
4854       else
4855         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4856
4857       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4858       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4859       gimple_assign_set_lhs (epilog_stmt, new_temp);
4860       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4861
4862       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4863           == INTEGER_INDUC_COND_REDUCTION)
4864         {
4865           /* Earlier we set the initial value to be zero.  Check the result
4866              and if it is zero then replace with the original initial
4867              value.  */
4868           tree zero = build_zero_cst (scalar_type);
4869           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4870
4871           tmp = make_ssa_name (new_scalar_dest);
4872           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4873                                              initial_def, new_temp);
4874           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4875           new_temp = tmp;
4876         }
4877
4878       scalar_results.safe_push (new_temp);
4879     }
4880   else
4881     {
4882       bool reduce_with_shift = have_whole_vector_shift (mode);
4883       int element_bitsize = tree_to_uhwi (bitsize);
4884       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4885       tree vec_temp;
4886
4887       /* Regardless of whether we have a whole vector shift, if we're
4888          emulating the operation via tree-vect-generic, we don't want
4889          to use it.  Only the first round of the reduction is likely
4890          to still be profitable via emulation.  */
4891       /* ??? It might be better to emit a reduction tree code here, so that
4892          tree-vect-generic can expand the first round via bit tricks.  */
4893       if (!VECTOR_MODE_P (mode))
4894         reduce_with_shift = false;
4895       else
4896         {
4897           optab optab = optab_for_tree_code (code, vectype, optab_default);
4898           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4899             reduce_with_shift = false;
4900         }
4901
4902       if (reduce_with_shift && !slp_reduc)
4903         {
4904           int nelements = vec_size_in_bits / element_bitsize;
4905           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4906
4907           int elt_offset;
4908
4909           tree zero_vec = build_zero_cst (vectype);
4910           /*** Case 2: Create:
4911              for (offset = nelements/2; offset >= 1; offset/=2)
4912                 {
4913                   Create:  va' = vec_shift <va, offset>
4914                   Create:  va = vop <va, va'>
4915                 }  */
4916
4917           tree rhs;
4918
4919           if (dump_enabled_p ())
4920             dump_printf_loc (MSG_NOTE, vect_location,
4921                              "Reduce using vector shifts\n");
4922
4923           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4924           new_temp = new_phi_result;
4925           for (elt_offset = nelements / 2;
4926                elt_offset >= 1;
4927                elt_offset /= 2)
4928             {
4929               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4930               tree mask = vect_gen_perm_mask_any (vectype, sel);
4931               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4932                                                  new_temp, zero_vec, mask);
4933               new_name = make_ssa_name (vec_dest, epilog_stmt);
4934               gimple_assign_set_lhs (epilog_stmt, new_name);
4935               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4936
4937               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4938                                                  new_temp);
4939               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4940               gimple_assign_set_lhs (epilog_stmt, new_temp);
4941               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4942             }
4943
4944           /* 2.4  Extract the final scalar result.  Create:
4945              s_out3 = extract_field <v_out2, bitpos>  */
4946
4947           if (dump_enabled_p ())
4948             dump_printf_loc (MSG_NOTE, vect_location,
4949                              "extract scalar result\n");
4950
4951           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4952                         bitsize, bitsize_zero_node);
4953           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4954           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4955           gimple_assign_set_lhs (epilog_stmt, new_temp);
4956           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4957           scalar_results.safe_push (new_temp);
4958         }
4959       else
4960         {
4961           /*** Case 3: Create:
4962              s = extract_field <v_out2, 0>
4963              for (offset = element_size;
4964                   offset < vector_size;
4965                   offset += element_size;)
4966                {
4967                  Create:  s' = extract_field <v_out2, offset>
4968                  Create:  s = op <s, s'>  // For non SLP cases
4969                }  */
4970
4971           if (dump_enabled_p ())
4972             dump_printf_loc (MSG_NOTE, vect_location,
4973                              "Reduce using scalar code.\n");
4974
4975           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4976           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4977             {
4978               int bit_offset;
4979               if (gimple_code (new_phi) == GIMPLE_PHI)
4980                 vec_temp = PHI_RESULT (new_phi);
4981               else
4982                 vec_temp = gimple_assign_lhs (new_phi);
4983               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4984                             bitsize_zero_node);
4985               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4986               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4987               gimple_assign_set_lhs (epilog_stmt, new_temp);
4988               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4989
4990               /* In SLP we don't need to apply reduction operation, so we just
4991                  collect s' values in SCALAR_RESULTS.  */
4992               if (slp_reduc)
4993                 scalar_results.safe_push (new_temp);
4994
4995               for (bit_offset = element_bitsize;
4996                    bit_offset < vec_size_in_bits;
4997                    bit_offset += element_bitsize)
4998                 {
4999                   tree bitpos = bitsize_int (bit_offset);
5000                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
5001                                      bitsize, bitpos);
5002
5003                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5004                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
5005                   gimple_assign_set_lhs (epilog_stmt, new_name);
5006                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5007
5008                   if (slp_reduc)
5009                     {
5010                       /* In SLP we don't need to apply reduction operation, so
5011                          we just collect s' values in SCALAR_RESULTS.  */
5012                       new_temp = new_name;
5013                       scalar_results.safe_push (new_name);
5014                     }
5015                   else
5016                     {
5017                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
5018                                                          new_name, new_temp);
5019                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5020                       gimple_assign_set_lhs (epilog_stmt, new_temp);
5021                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5022                     }
5023                 }
5024             }
5025
5026           /* The only case where we need to reduce scalar results in SLP, is
5027              unrolling.  If the size of SCALAR_RESULTS is greater than
5028              GROUP_SIZE, we reduce them combining elements modulo
5029              GROUP_SIZE.  */
5030           if (slp_reduc)
5031             {
5032               tree res, first_res, new_res;
5033               gimple *new_stmt;
5034
5035               /* Reduce multiple scalar results in case of SLP unrolling.  */
5036               for (j = group_size; scalar_results.iterate (j, &res);
5037                    j++)
5038                 {
5039                   first_res = scalar_results[j % group_size];
5040                   new_stmt = gimple_build_assign (new_scalar_dest, code,
5041                                                   first_res, res);
5042                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
5043                   gimple_assign_set_lhs (new_stmt, new_res);
5044                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
5045                   scalar_results[j % group_size] = new_res;
5046                 }
5047             }
5048           else
5049             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
5050             scalar_results.safe_push (new_temp);
5051         }
5052     }
5053
5054 vect_finalize_reduction:
5055
5056   if (double_reduc)
5057     loop = loop->inner;
5058
5059   /* 2.5 Adjust the final result by the initial value of the reduction
5060          variable. (When such adjustment is not needed, then
5061          'adjustment_def' is zero).  For example, if code is PLUS we create:
5062          new_temp = loop_exit_def + adjustment_def  */
5063
5064   if (adjustment_def)
5065     {
5066       gcc_assert (!slp_reduc);
5067       if (nested_in_vect_loop)
5068         {
5069           new_phi = new_phis[0];
5070           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
5071           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
5072           new_dest = vect_create_destination_var (scalar_dest, vectype);
5073         }
5074       else
5075         {
5076           new_temp = scalar_results[0];
5077           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
5078           expr = build2 (code, scalar_type, new_temp, adjustment_def);
5079           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
5080         }
5081
5082       epilog_stmt = gimple_build_assign (new_dest, expr);
5083       new_temp = make_ssa_name (new_dest, epilog_stmt);
5084       gimple_assign_set_lhs (epilog_stmt, new_temp);
5085       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5086       if (nested_in_vect_loop)
5087         {
5088           set_vinfo_for_stmt (epilog_stmt,
5089                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
5090           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
5091                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
5092
5093           if (!double_reduc)
5094             scalar_results.quick_push (new_temp);
5095           else
5096             scalar_results[0] = new_temp;
5097         }
5098       else
5099         scalar_results[0] = new_temp;
5100
5101       new_phis[0] = epilog_stmt;
5102     }
5103
5104   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
5105           phis with new adjusted scalar results, i.e., replace use <s_out0>
5106           with use <s_out4>.
5107
5108      Transform:
5109         loop_exit:
5110           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
5111           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
5112           v_out2 = reduce <v_out1>
5113           s_out3 = extract_field <v_out2, 0>
5114           s_out4 = adjust_result <s_out3>
5115           use <s_out0>
5116           use <s_out0>
5117
5118      into:
5119
5120         loop_exit:
5121           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
5122           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
5123           v_out2 = reduce <v_out1>
5124           s_out3 = extract_field <v_out2, 0>
5125           s_out4 = adjust_result <s_out3>
5126           use <s_out4>
5127           use <s_out4> */
5128
5129
5130   /* In SLP reduction chain we reduce vector results into one vector if
5131      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
5132      the last stmt in the reduction chain, since we are looking for the loop
5133      exit phi node.  */
5134   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
5135     {
5136       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
5137       /* Handle reduction patterns.  */
5138       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
5139         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
5140
5141       scalar_dest = gimple_assign_lhs (dest_stmt);
5142       group_size = 1;
5143     }
5144
5145   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
5146      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
5147      need to match SCALAR_RESULTS with corresponding statements.  The first
5148      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
5149      the first vector stmt, etc.
5150      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
5151   if (group_size > new_phis.length ())
5152     {
5153       ratio = group_size / new_phis.length ();
5154       gcc_assert (!(group_size % new_phis.length ()));
5155     }
5156   else
5157     ratio = 1;
5158
5159   for (k = 0; k < group_size; k++)
5160     {
5161       if (k % ratio == 0)
5162         {
5163           epilog_stmt = new_phis[k / ratio];
5164           reduction_phi = reduction_phis[k / ratio];
5165           if (double_reduc)
5166             inner_phi = inner_phis[k / ratio];
5167         }
5168
5169       if (slp_reduc)
5170         {
5171           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5172
5173           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
5174           /* SLP statements can't participate in patterns.  */
5175           gcc_assert (!orig_stmt);
5176           scalar_dest = gimple_assign_lhs (current_stmt);
5177         }
5178
5179       phis.create (3);
5180       /* Find the loop-closed-use at the loop exit of the original scalar
5181          result.  (The reduction result is expected to have two immediate uses -
5182          one at the latch block, and one at the loop exit).  */
5183       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5184         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
5185             && !is_gimple_debug (USE_STMT (use_p)))
5186           phis.safe_push (USE_STMT (use_p));
5187
5188       /* While we expect to have found an exit_phi because of loop-closed-ssa
5189          form we can end up without one if the scalar cycle is dead.  */
5190
5191       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5192         {
5193           if (outer_loop)
5194             {
5195               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
5196               gphi *vect_phi;
5197
5198               /* FORNOW. Currently not supporting the case that an inner-loop
5199                  reduction is not used in the outer-loop (but only outside the
5200                  outer-loop), unless it is double reduction.  */
5201               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5202                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
5203                           || double_reduc);
5204
5205               if (double_reduc)
5206                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5207               else
5208                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
5209               if (!double_reduc
5210                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5211                       != vect_double_reduction_def)
5212                 continue;
5213
5214               /* Handle double reduction:
5215
5216                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
5217                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
5218                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
5219                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
5220
5221                  At that point the regular reduction (stmt2 and stmt3) is
5222                  already vectorized, as well as the exit phi node, stmt4.
5223                  Here we vectorize the phi node of double reduction, stmt1, and
5224                  update all relevant statements.  */
5225
5226               /* Go through all the uses of s2 to find double reduction phi
5227                  node, i.e., stmt1 above.  */
5228               orig_name = PHI_RESULT (exit_phi);
5229               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5230                 {
5231                   stmt_vec_info use_stmt_vinfo;
5232                   stmt_vec_info new_phi_vinfo;
5233                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
5234                   basic_block bb = gimple_bb (use_stmt);
5235                   gimple *use;
5236
5237                   /* Check that USE_STMT is really double reduction phi
5238                      node.  */
5239                   if (gimple_code (use_stmt) != GIMPLE_PHI
5240                       || gimple_phi_num_args (use_stmt) != 2
5241                       || bb->loop_father != outer_loop)
5242                     continue;
5243                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
5244                   if (!use_stmt_vinfo
5245                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5246                           != vect_double_reduction_def)
5247                     continue;
5248
5249                   /* Create vector phi node for double reduction:
5250                      vs1 = phi <vs0, vs2>
5251                      vs1 was created previously in this function by a call to
5252                        vect_get_vec_def_for_operand and is stored in
5253                        vec_initial_def;
5254                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5255                      vs0 is created here.  */
5256
5257                   /* Create vector phi node.  */
5258                   vect_phi = create_phi_node (vec_initial_def, bb);
5259                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
5260                                     loop_vec_info_for_loop (outer_loop));
5261                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5262
5263                   /* Create vs0 - initial def of the double reduction phi.  */
5264                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5265                                              loop_preheader_edge (outer_loop));
5266                   init_def = get_initial_def_for_reduction (stmt,
5267                                                           preheader_arg, NULL);
5268                   vect_phi_init = vect_init_vector (use_stmt, init_def,
5269                                                     vectype, NULL);
5270
5271                   /* Update phi node arguments with vs0 and vs2.  */
5272                   add_phi_arg (vect_phi, vect_phi_init,
5273                                loop_preheader_edge (outer_loop),
5274                                UNKNOWN_LOCATION);
5275                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5276                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5277                   if (dump_enabled_p ())
5278                     {
5279                       dump_printf_loc (MSG_NOTE, vect_location,
5280                                        "created double reduction phi node: ");
5281                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5282                     }
5283
5284                   vect_phi_res = PHI_RESULT (vect_phi);
5285
5286                   /* Replace the use, i.e., set the correct vs1 in the regular
5287                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5288                      loop is redundant.  */
5289                   use = reduction_phi;
5290                   for (j = 0; j < ncopies; j++)
5291                     {
5292                       edge pr_edge = loop_preheader_edge (loop);
5293                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5294                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5295                     }
5296                 }
5297             }
5298         }
5299
5300       phis.release ();
5301       if (nested_in_vect_loop)
5302         {
5303           if (double_reduc)
5304             loop = outer_loop;
5305           else
5306             continue;
5307         }
5308
5309       phis.create (3);
5310       /* Find the loop-closed-use at the loop exit of the original scalar
5311          result.  (The reduction result is expected to have two immediate uses,
5312          one at the latch block, and one at the loop exit).  For double
5313          reductions we are looking for exit phis of the outer loop.  */
5314       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5315         {
5316           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5317             {
5318               if (!is_gimple_debug (USE_STMT (use_p)))
5319                 phis.safe_push (USE_STMT (use_p));
5320             }
5321           else
5322             {
5323               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5324                 {
5325                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5326
5327                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5328                     {
5329                       if (!flow_bb_inside_loop_p (loop,
5330                                              gimple_bb (USE_STMT (phi_use_p)))
5331                           && !is_gimple_debug (USE_STMT (phi_use_p)))
5332                         phis.safe_push (USE_STMT (phi_use_p));
5333                     }
5334                 }
5335             }
5336         }
5337
5338       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5339         {
5340           /* Replace the uses:  */
5341           orig_name = PHI_RESULT (exit_phi);
5342           scalar_result = scalar_results[k];
5343           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5344             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5345               SET_USE (use_p, scalar_result);
5346         }
5347
5348       phis.release ();
5349     }
5350 }
5351
5352
5353 /* Function is_nonwrapping_integer_induction.
5354
5355    Check if STMT (which is part of loop LOOP) both increments and
5356    does not cause overflow.  */
5357
5358 static bool
5359 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5360 {
5361   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5362   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5363   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5364   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5365   widest_int ni, max_loop_value, lhs_max;
5366   bool overflow = false;
5367
5368   /* Make sure the loop is integer based.  */
5369   if (TREE_CODE (base) != INTEGER_CST
5370       || TREE_CODE (step) != INTEGER_CST)
5371     return false;
5372
5373   /* Check that the induction increments.  */
5374   if (tree_int_cst_sgn (step) == -1)
5375     return false;
5376
5377   /* Check that the max size of the loop will not wrap.  */
5378
5379   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5380     return true;
5381
5382   if (! max_stmt_executions (loop, &ni))
5383     return false;
5384
5385   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5386                             &overflow);
5387   if (overflow)
5388     return false;
5389
5390   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5391                             TYPE_SIGN (lhs_type), &overflow);
5392   if (overflow)
5393     return false;
5394
5395   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5396           <= TYPE_PRECISION (lhs_type));
5397 }
5398
5399 /* Function vectorizable_reduction.
5400
5401    Check if STMT performs a reduction operation that can be vectorized.
5402    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5403    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5404    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5405
5406    This function also handles reduction idioms (patterns) that have been
5407    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5408    of this form:
5409      X = pattern_expr (arg0, arg1, ..., X)
5410    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5411    sequence that had been detected and replaced by the pattern-stmt (STMT).
5412
5413    This function also handles reduction of condition expressions, for example:
5414      for (int i = 0; i < N; i++)
5415        if (a[i] < value)
5416          last = a[i];
5417    This is handled by vectorising the loop and creating an additional vector
5418    containing the loop indexes for which "a[i] < value" was true.  In the
5419    function epilogue this is reduced to a single max value and then used to
5420    index into the vector of results.
5421
5422    In some cases of reduction patterns, the type of the reduction variable X is
5423    different than the type of the other arguments of STMT.
5424    In such cases, the vectype that is used when transforming STMT into a vector
5425    stmt is different than the vectype that is used to determine the
5426    vectorization factor, because it consists of a different number of elements
5427    than the actual number of elements that are being operated upon in parallel.
5428
5429    For example, consider an accumulation of shorts into an int accumulator.
5430    On some targets it's possible to vectorize this pattern operating on 8
5431    shorts at a time (hence, the vectype for purposes of determining the
5432    vectorization factor should be V8HI); on the other hand, the vectype that
5433    is used to create the vector form is actually V4SI (the type of the result).
5434
5435    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5436    indicates what is the actual level of parallelism (V8HI in the example), so
5437    that the right vectorization factor would be derived.  This vectype
5438    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5439    be used to create the vectorized stmt.  The right vectype for the vectorized
5440    stmt is obtained from the type of the result X:
5441         get_vectype_for_scalar_type (TREE_TYPE (X))
5442
5443    This means that, contrary to "regular" reductions (or "regular" stmts in
5444    general), the following equation:
5445       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5446    does *NOT* necessarily hold for reduction patterns.  */
5447
5448 bool
5449 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5450                         gimple **vec_stmt, slp_tree slp_node)
5451 {
5452   tree vec_dest;
5453   tree scalar_dest;
5454   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5455   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5456   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5457   tree vectype_in = NULL_TREE;
5458   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5459   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5460   enum tree_code code, orig_code, epilog_reduc_code;
5461   machine_mode vec_mode;
5462   int op_type;
5463   optab optab, reduc_optab;
5464   tree new_temp = NULL_TREE;
5465   gimple *def_stmt;
5466   enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
5467   gphi *new_phi = NULL;
5468   tree scalar_type;
5469   bool is_simple_use;
5470   gimple *orig_stmt;
5471   stmt_vec_info orig_stmt_info;
5472   tree expr = NULL_TREE;
5473   int i;
5474   int ncopies;
5475   int epilog_copies;
5476   stmt_vec_info prev_stmt_info, prev_phi_info;
5477   bool single_defuse_cycle = false;
5478   tree reduc_def = NULL_TREE;
5479   gimple *new_stmt = NULL;
5480   int j;
5481   tree ops[3];
5482   bool nested_cycle = false, found_nested_cycle_def = false;
5483   gimple *reduc_def_stmt = NULL;
5484   bool double_reduc = false, dummy;
5485   basic_block def_bb;
5486   struct loop * def_stmt_loop, *outer_loop = NULL;
5487   tree def_arg;
5488   gimple *def_arg_stmt;
5489   auto_vec<tree> vec_oprnds0;
5490   auto_vec<tree> vec_oprnds1;
5491   auto_vec<tree> vect_defs;
5492   auto_vec<gimple *> phis;
5493   int vec_num;
5494   tree def0, def1, tem, op1 = NULL_TREE;
5495   bool first_p = true;
5496   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5497   tree cond_reduc_val = NULL_TREE;
5498
5499   /* In case of reduction chain we switch to the first stmt in the chain, but
5500      we don't update STMT_INFO, since only the last stmt is marked as reduction
5501      and has reduction properties.  */
5502   if (GROUP_FIRST_ELEMENT (stmt_info)
5503       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5504     {
5505       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5506       first_p = false;
5507     }
5508
5509   if (nested_in_vect_loop_p (loop, stmt))
5510     {
5511       outer_loop = loop;
5512       loop = loop->inner;
5513       nested_cycle = true;
5514     }
5515
5516   /* 1. Is vectorizable reduction?  */
5517   /* Not supportable if the reduction variable is used in the loop, unless
5518      it's a reduction chain.  */
5519   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5520       && !GROUP_FIRST_ELEMENT (stmt_info))
5521     return false;
5522
5523   /* Reductions that are not used even in an enclosing outer-loop,
5524      are expected to be "live" (used out of the loop).  */
5525   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5526       && !STMT_VINFO_LIVE_P (stmt_info))
5527     return false;
5528
5529   /* Make sure it was already recognized as a reduction computation.  */
5530   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5531       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5532     return false;
5533
5534   /* 2. Has this been recognized as a reduction pattern?
5535
5536      Check if STMT represents a pattern that has been recognized
5537      in earlier analysis stages.  For stmts that represent a pattern,
5538      the STMT_VINFO_RELATED_STMT field records the last stmt in
5539      the original sequence that constitutes the pattern.  */
5540
5541   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5542   if (orig_stmt)
5543     {
5544       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5545       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5546       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5547     }
5548
5549   /* 3. Check the operands of the operation.  The first operands are defined
5550         inside the loop body. The last operand is the reduction variable,
5551         which is defined by the loop-header-phi.  */
5552
5553   gcc_assert (is_gimple_assign (stmt));
5554
5555   /* Flatten RHS.  */
5556   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5557     {
5558     case GIMPLE_SINGLE_RHS:
5559       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5560       if (op_type == ternary_op)
5561         {
5562           tree rhs = gimple_assign_rhs1 (stmt);
5563           ops[0] = TREE_OPERAND (rhs, 0);
5564           ops[1] = TREE_OPERAND (rhs, 1);
5565           ops[2] = TREE_OPERAND (rhs, 2);
5566           code = TREE_CODE (rhs);
5567         }
5568       else
5569         return false;
5570       break;
5571
5572     case GIMPLE_BINARY_RHS:
5573       code = gimple_assign_rhs_code (stmt);
5574       op_type = TREE_CODE_LENGTH (code);
5575       gcc_assert (op_type == binary_op);
5576       ops[0] = gimple_assign_rhs1 (stmt);
5577       ops[1] = gimple_assign_rhs2 (stmt);
5578       break;
5579
5580     case GIMPLE_TERNARY_RHS:
5581       code = gimple_assign_rhs_code (stmt);
5582       op_type = TREE_CODE_LENGTH (code);
5583       gcc_assert (op_type == ternary_op);
5584       ops[0] = gimple_assign_rhs1 (stmt);
5585       ops[1] = gimple_assign_rhs2 (stmt);
5586       ops[2] = gimple_assign_rhs3 (stmt);
5587       break;
5588
5589     case GIMPLE_UNARY_RHS:
5590       return false;
5591
5592     default:
5593       gcc_unreachable ();
5594     }
5595   /* The default is that the reduction variable is the last in statement.  */
5596   int reduc_index = op_type - 1;
5597   if (code == MINUS_EXPR)
5598     reduc_index = 0;
5599
5600   if (code == COND_EXPR && slp_node)
5601     return false;
5602
5603   scalar_dest = gimple_assign_lhs (stmt);
5604   scalar_type = TREE_TYPE (scalar_dest);
5605   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5606       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5607     return false;
5608
5609   /* Do not try to vectorize bit-precision reductions.  */
5610   if ((TYPE_PRECISION (scalar_type)
5611        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5612     return false;
5613
5614   /* All uses but the last are expected to be defined in the loop.
5615      The last use is the reduction variable.  In case of nested cycle this
5616      assumption is not true: we use reduc_index to record the index of the
5617      reduction variable.  */
5618   for (i = 0; i < op_type; i++)
5619     {
5620       if (i == reduc_index)
5621         continue;
5622
5623       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5624       if (i == 0 && code == COND_EXPR)
5625         continue;
5626
5627       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5628                                           &def_stmt, &dt, &tem);
5629       if (!vectype_in)
5630         vectype_in = tem;
5631       gcc_assert (is_simple_use);
5632
5633       if (dt != vect_internal_def
5634           && dt != vect_external_def
5635           && dt != vect_constant_def
5636           && dt != vect_induction_def
5637           && !(dt == vect_nested_cycle && nested_cycle))
5638         return false;
5639
5640       if (dt == vect_nested_cycle)
5641         {
5642           found_nested_cycle_def = true;
5643           reduc_def_stmt = def_stmt;
5644           reduc_index = i;
5645         }
5646
5647       if (i == 1 && code == COND_EXPR)
5648         {
5649           /* Record how value of COND_EXPR is defined.  */
5650           if (dt == vect_constant_def)
5651             {
5652               cond_reduc_dt = dt;
5653               cond_reduc_val = ops[i];
5654             }
5655           if (dt == vect_induction_def && def_stmt != NULL
5656               && is_nonwrapping_integer_induction (def_stmt, loop))
5657             cond_reduc_dt = dt;
5658         }
5659     }
5660
5661   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
5662                                       &def_stmt, &dt, &tem);
5663   if (!vectype_in)
5664     vectype_in = tem;
5665   gcc_assert (is_simple_use);
5666   if (!found_nested_cycle_def)
5667     reduc_def_stmt = def_stmt;
5668
5669   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5670     return false;
5671
5672   if (!(dt == vect_reduction_def
5673         || dt == vect_nested_cycle
5674         || ((dt == vect_internal_def || dt == vect_external_def
5675              || dt == vect_constant_def || dt == vect_induction_def)
5676             && nested_cycle && found_nested_cycle_def)))
5677     {
5678       /* For pattern recognized stmts, orig_stmt might be a reduction,
5679          but some helper statements for the pattern might not, or
5680          might be COND_EXPRs with reduction uses in the condition.  */
5681       gcc_assert (orig_stmt);
5682       return false;
5683     }
5684
5685   enum vect_reduction_type v_reduc_type;
5686   gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5687                                           !nested_cycle, &dummy, false,
5688                                           &v_reduc_type);
5689
5690   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5691   /* If we have a condition reduction, see if we can simplify it further.  */
5692   if (v_reduc_type == COND_REDUCTION)
5693     {
5694       if (cond_reduc_dt == vect_induction_def)
5695         {
5696           if (dump_enabled_p ())
5697             dump_printf_loc (MSG_NOTE, vect_location,
5698                              "condition expression based on "
5699                              "integer induction.\n");
5700           STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5701             = INTEGER_INDUC_COND_REDUCTION;
5702         }
5703
5704       /* Loop peeling modifies initial value of reduction PHI, which
5705          makes the reduction stmt to be transformed different to the
5706          original stmt analyzed.  We need to record reduction code for
5707          CONST_COND_REDUCTION type reduction at analyzing stage, thus
5708          it can be used directly at transform stage.  */
5709       if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
5710           || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
5711         {
5712           /* Also set the reduction type to CONST_COND_REDUCTION.  */
5713           gcc_assert (cond_reduc_dt == vect_constant_def);
5714           STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
5715         }
5716       else if (cond_reduc_dt == vect_constant_def)
5717         {
5718           enum vect_def_type cond_initial_dt;
5719           gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
5720           tree cond_initial_val
5721             = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
5722
5723           gcc_assert (cond_reduc_val != NULL_TREE);
5724           vect_is_simple_use (cond_initial_val, loop_vinfo,
5725                               &def_stmt, &cond_initial_dt);
5726           if (cond_initial_dt == vect_constant_def
5727               && types_compatible_p (TREE_TYPE (cond_initial_val),
5728                                      TREE_TYPE (cond_reduc_val)))
5729             {
5730               tree e = fold_build2 (LE_EXPR, boolean_type_node,
5731                                     cond_initial_val, cond_reduc_val);
5732               if (e && (integer_onep (e) || integer_zerop (e)))
5733                 {
5734                   if (dump_enabled_p ())
5735                     dump_printf_loc (MSG_NOTE, vect_location,
5736                                      "condition expression based on "
5737                                      "compile time constant.\n");
5738                   /* Record reduction code at analysis stage.  */
5739                   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
5740                     = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
5741                   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5742                     = CONST_COND_REDUCTION;
5743                 }
5744             }
5745         }
5746     }
5747
5748   if (orig_stmt)
5749     gcc_assert (tmp == orig_stmt
5750                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5751   else
5752     /* We changed STMT to be the first stmt in reduction chain, hence we
5753        check that in this case the first element in the chain is STMT.  */
5754     gcc_assert (stmt == tmp
5755                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5756
5757   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5758     return false;
5759
5760   if (slp_node)
5761     ncopies = 1;
5762   else
5763     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5764                / TYPE_VECTOR_SUBPARTS (vectype_in));
5765
5766   gcc_assert (ncopies >= 1);
5767
5768   vec_mode = TYPE_MODE (vectype_in);
5769
5770   if (code == COND_EXPR)
5771     {
5772       /* Only call during the analysis stage, otherwise we'll lose
5773          STMT_VINFO_TYPE.  */
5774       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5775                                                 ops[reduc_index], 0, NULL))
5776         {
5777           if (dump_enabled_p ())
5778             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5779                              "unsupported condition in reduction\n");
5780           return false;
5781         }
5782     }
5783   else
5784     {
5785       /* 4. Supportable by target?  */
5786
5787       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5788           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5789         {
5790           /* Shifts and rotates are only supported by vectorizable_shifts,
5791              not vectorizable_reduction.  */
5792           if (dump_enabled_p ())
5793             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5794                              "unsupported shift or rotation.\n");
5795           return false;
5796         }
5797
5798       /* 4.1. check support for the operation in the loop  */
5799       optab = optab_for_tree_code (code, vectype_in, optab_default);
5800       if (!optab)
5801         {
5802           if (dump_enabled_p ())
5803             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804                              "no optab.\n");
5805
5806           return false;
5807         }
5808
5809       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5810         {
5811           if (dump_enabled_p ())
5812             dump_printf (MSG_NOTE, "op not supported by target.\n");
5813
5814           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5815               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5816                   < vect_min_worthwhile_factor (code))
5817             return false;
5818
5819           if (dump_enabled_p ())
5820             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5821         }
5822
5823       /* Worthwhile without SIMD support?  */
5824       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5825           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5826              < vect_min_worthwhile_factor (code))
5827         {
5828           if (dump_enabled_p ())
5829             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5830                              "not worthwhile without SIMD support.\n");
5831
5832           return false;
5833         }
5834     }
5835
5836   /* 4.2. Check support for the epilog operation.
5837
5838           If STMT represents a reduction pattern, then the type of the
5839           reduction variable may be different than the type of the rest
5840           of the arguments.  For example, consider the case of accumulation
5841           of shorts into an int accumulator; The original code:
5842                         S1: int_a = (int) short_a;
5843           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5844
5845           was replaced with:
5846                         STMT: int_acc = widen_sum <short_a, int_acc>
5847
5848           This means that:
5849           1. The tree-code that is used to create the vector operation in the
5850              epilog code (that reduces the partial results) is not the
5851              tree-code of STMT, but is rather the tree-code of the original
5852              stmt from the pattern that STMT is replacing.  I.e, in the example
5853              above we want to use 'widen_sum' in the loop, but 'plus' in the
5854              epilog.
5855           2. The type (mode) we use to check available target support
5856              for the vector operation to be created in the *epilog*, is
5857              determined by the type of the reduction variable (in the example
5858              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5859              However the type (mode) we use to check available target support
5860              for the vector operation to be created *inside the loop*, is
5861              determined by the type of the other arguments to STMT (in the
5862              example we'd check this: optab_handler (widen_sum_optab,
5863              vect_short_mode)).
5864
5865           This is contrary to "regular" reductions, in which the types of all
5866           the arguments are the same as the type of the reduction variable.
5867           For "regular" reductions we can therefore use the same vector type
5868           (and also the same tree-code) when generating the epilog code and
5869           when generating the code inside the loop.  */
5870
5871   if (orig_stmt)
5872     {
5873       /* This is a reduction pattern: get the vectype from the type of the
5874          reduction variable, and get the tree-code from orig_stmt.  */
5875       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5876                   == TREE_CODE_REDUCTION);
5877       orig_code = gimple_assign_rhs_code (orig_stmt);
5878       gcc_assert (vectype_out);
5879       vec_mode = TYPE_MODE (vectype_out);
5880     }
5881   else
5882     {
5883       /* Regular reduction: use the same vectype and tree-code as used for
5884          the vector code inside the loop can be used for the epilog code. */
5885       orig_code = code;
5886
5887       if (code == MINUS_EXPR)
5888         orig_code = PLUS_EXPR;
5889
5890       /* For simple condition reductions, replace with the actual expression
5891          we want to base our reduction around.  */
5892       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
5893         {
5894           orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
5895           gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
5896         }
5897       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5898                  == INTEGER_INDUC_COND_REDUCTION)
5899         orig_code = MAX_EXPR;
5900     }
5901
5902   if (nested_cycle)
5903     {
5904       def_bb = gimple_bb (reduc_def_stmt);
5905       def_stmt_loop = def_bb->loop_father;
5906       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5907                                        loop_preheader_edge (def_stmt_loop));
5908       if (TREE_CODE (def_arg) == SSA_NAME
5909           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5910           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5911           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5912           && vinfo_for_stmt (def_arg_stmt)
5913           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5914               == vect_double_reduction_def)
5915         double_reduc = true;
5916     }
5917
5918   epilog_reduc_code = ERROR_MARK;
5919
5920   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
5921     {
5922       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5923         {
5924           reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5925                                          optab_default);
5926           if (!reduc_optab)
5927             {
5928               if (dump_enabled_p ())
5929                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5930                                  "no optab for reduction.\n");
5931
5932               epilog_reduc_code = ERROR_MARK;
5933             }
5934           else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5935             {
5936               if (dump_enabled_p ())
5937                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5938                                  "reduc op not supported by target.\n");
5939
5940               epilog_reduc_code = ERROR_MARK;
5941             }
5942
5943           /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5944              generated in the epilog using multiple expressions.  This does not
5945              work for condition reductions.  */
5946           if (epilog_reduc_code == ERROR_MARK
5947               && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5948                         == INTEGER_INDUC_COND_REDUCTION
5949                   || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5950                         == CONST_COND_REDUCTION))
5951             {
5952               if (dump_enabled_p ())
5953                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5954                                  "no reduc code for scalar code.\n");
5955               return false;
5956             }
5957         }
5958       else
5959         {
5960           if (!nested_cycle || double_reduc)
5961             {
5962               if (dump_enabled_p ())
5963                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5964                                  "no reduc code for scalar code.\n");
5965
5966               return false;
5967             }
5968         }
5969     }
5970   else
5971     {
5972       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
5973       cr_index_scalar_type = make_unsigned_type (scalar_precision);
5974       cr_index_vector_type = build_vector_type
5975         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
5976
5977       epilog_reduc_code = REDUC_MAX_EXPR;
5978       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
5979                                    optab_default);
5980       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
5981           == CODE_FOR_nothing)
5982         {
5983           if (dump_enabled_p ())
5984             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5985                              "reduc max op not supported by target.\n");
5986           return false;
5987         }
5988     }
5989
5990   if ((double_reduc
5991        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
5992       && ncopies > 1)
5993     {
5994       if (dump_enabled_p ())
5995         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5996                          "multiple types in double reduction or condition "
5997                          "reduction.\n");
5998       return false;
5999     }
6000
6001   /* In case of widenning multiplication by a constant, we update the type
6002      of the constant to be the type of the other operand.  We check that the
6003      constant fits the type in the pattern recognition pass.  */
6004   if (code == DOT_PROD_EXPR
6005       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
6006     {
6007       if (TREE_CODE (ops[0]) == INTEGER_CST)
6008         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
6009       else if (TREE_CODE (ops[1]) == INTEGER_CST)
6010         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
6011       else
6012         {
6013           if (dump_enabled_p ())
6014             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015                              "invalid types in dot-prod\n");
6016
6017           return false;
6018         }
6019     }
6020
6021   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6022     {
6023       widest_int ni;
6024
6025       if (! max_loop_iterations (loop, &ni))
6026         {
6027           if (dump_enabled_p ())
6028             dump_printf_loc (MSG_NOTE, vect_location,
6029                              "loop count not known, cannot create cond "
6030                              "reduction.\n");
6031           return false;
6032         }
6033       /* Convert backedges to iterations.  */
6034       ni += 1;
6035
6036       /* The additional index will be the same type as the condition.  Check
6037          that the loop can fit into this less one (because we'll use up the
6038          zero slot for when there are no matches).  */
6039       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
6040       if (wi::geu_p (ni, wi::to_widest (max_index)))
6041         {
6042           if (dump_enabled_p ())
6043             dump_printf_loc (MSG_NOTE, vect_location,
6044                              "loop size is greater than data size.\n");
6045           return false;
6046         }
6047     }
6048
6049   if (!vec_stmt) /* transformation not required.  */
6050     {
6051       if (first_p
6052           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
6053                                          reduc_index))
6054         return false;
6055       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
6056       return true;
6057     }
6058
6059   /** Transform.  **/
6060
6061   if (dump_enabled_p ())
6062     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
6063
6064   /* FORNOW: Multiple types are not supported for condition.  */
6065   if (code == COND_EXPR)
6066     gcc_assert (ncopies == 1);
6067
6068   /* Create the destination vector  */
6069   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6070
6071   /* In case the vectorization factor (VF) is bigger than the number
6072      of elements that we can fit in a vectype (nunits), we have to generate
6073      more than one vector stmt - i.e - we need to "unroll" the
6074      vector stmt by a factor VF/nunits.  For more details see documentation
6075      in vectorizable_operation.  */
6076
6077   /* If the reduction is used in an outer loop we need to generate
6078      VF intermediate results, like so (e.g. for ncopies=2):
6079         r0 = phi (init, r0)
6080         r1 = phi (init, r1)
6081         r0 = x0 + r0;
6082         r1 = x1 + r1;
6083     (i.e. we generate VF results in 2 registers).
6084     In this case we have a separate def-use cycle for each copy, and therefore
6085     for each copy we get the vector def for the reduction variable from the
6086     respective phi node created for this copy.
6087
6088     Otherwise (the reduction is unused in the loop nest), we can combine
6089     together intermediate results, like so (e.g. for ncopies=2):
6090         r = phi (init, r)
6091         r = x0 + r;
6092         r = x1 + r;
6093    (i.e. we generate VF/2 results in a single register).
6094    In this case for each copy we get the vector def for the reduction variable
6095    from the vectorized reduction operation generated in the previous iteration.
6096   */
6097
6098   if (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
6099     {
6100       single_defuse_cycle = true;
6101       epilog_copies = 1;
6102     }
6103   else
6104     epilog_copies = ncopies;
6105
6106   prev_stmt_info = NULL;
6107   prev_phi_info = NULL;
6108   if (slp_node)
6109     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6110   else
6111     {
6112       vec_num = 1;
6113       vec_oprnds0.create (1);
6114       if (op_type == ternary_op)
6115         vec_oprnds1.create (1);
6116     }
6117
6118   phis.create (vec_num);
6119   vect_defs.create (vec_num);
6120   if (!slp_node)
6121     vect_defs.quick_push (NULL_TREE);
6122
6123   for (j = 0; j < ncopies; j++)
6124     {
6125       if (j == 0 || !single_defuse_cycle)
6126         {
6127           for (i = 0; i < vec_num; i++)
6128             {
6129               /* Create the reduction-phi that defines the reduction
6130                  operand.  */
6131               new_phi = create_phi_node (vec_dest, loop->header);
6132               set_vinfo_for_stmt (new_phi,
6133                                   new_stmt_vec_info (new_phi, loop_vinfo));
6134                if (j == 0 || slp_node)
6135                  phis.quick_push (new_phi);
6136             }
6137         }
6138
6139       if (code == COND_EXPR)
6140         {
6141           gcc_assert (!slp_node);
6142           vectorizable_condition (stmt, gsi, vec_stmt,
6143                                   PHI_RESULT (phis[0]),
6144                                   reduc_index, NULL);
6145           /* Multiple types are not supported for condition.  */
6146           break;
6147         }
6148
6149       /* Handle uses.  */
6150       if (j == 0)
6151         {
6152           if (slp_node)
6153             {
6154               /* Get vec defs for all the operands except the reduction index,
6155                 ensuring the ordering of the ops in the vector is kept.  */
6156               auto_vec<tree, 3> slp_ops;
6157               auto_vec<vec<tree>, 3> vec_defs;
6158
6159               slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]);
6160               slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]);
6161               if (op_type == ternary_op)
6162                 slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]);
6163
6164               vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
6165
6166               vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1 : 0]);
6167               if (op_type == ternary_op)
6168                 vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ? 1 : 2]);
6169             }
6170           else
6171             {
6172               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
6173                                                             stmt);
6174               vec_oprnds0.quick_push (loop_vec_def0);
6175               if (op_type == ternary_op)
6176                {
6177                  op1 = (reduc_index == 0) ? ops[2] : ops[1];
6178                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
6179                  vec_oprnds1.quick_push (loop_vec_def1);
6180                }
6181             }
6182         }
6183       else
6184         {
6185           if (!slp_node)
6186             {
6187               enum vect_def_type dt;
6188               gimple *dummy_stmt;
6189
6190               vect_is_simple_use (ops[!reduc_index], loop_vinfo,
6191                                   &dummy_stmt, &dt);
6192               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
6193                                                               loop_vec_def0);
6194               vec_oprnds0[0] = loop_vec_def0;
6195               if (op_type == ternary_op)
6196                 {
6197                   vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
6198                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
6199                                                                 loop_vec_def1);
6200                   vec_oprnds1[0] = loop_vec_def1;
6201                 }
6202             }
6203
6204           if (single_defuse_cycle)
6205             reduc_def = gimple_assign_lhs (new_stmt);
6206
6207           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
6208         }
6209
6210       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
6211         {
6212           if (slp_node)
6213             reduc_def = PHI_RESULT (phis[i]);
6214           else
6215             {
6216               if (!single_defuse_cycle || j == 0)
6217                 reduc_def = PHI_RESULT (new_phi);
6218             }
6219
6220           def1 = ((op_type == ternary_op)
6221                   ? vec_oprnds1[i] : NULL);
6222           if (op_type == binary_op)
6223             {
6224               if (reduc_index == 0)
6225                 expr = build2 (code, vectype_out, reduc_def, def0);
6226               else
6227                 expr = build2 (code, vectype_out, def0, reduc_def);
6228             }
6229           else
6230             {
6231               if (reduc_index == 0)
6232                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
6233               else
6234                 {
6235                   if (reduc_index == 1)
6236                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
6237                   else
6238                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
6239                 }
6240             }
6241
6242           new_stmt = gimple_build_assign (vec_dest, expr);
6243           new_temp = make_ssa_name (vec_dest, new_stmt);
6244           gimple_assign_set_lhs (new_stmt, new_temp);
6245           vect_finish_stmt_generation (stmt, new_stmt, gsi);
6246
6247           if (slp_node)
6248             {
6249               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6250               vect_defs.quick_push (new_temp);
6251             }
6252           else
6253             vect_defs[0] = new_temp;
6254         }
6255
6256       if (slp_node)
6257         continue;
6258
6259       if (j == 0)
6260         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6261       else
6262         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6263
6264       prev_stmt_info = vinfo_for_stmt (new_stmt);
6265       prev_phi_info = vinfo_for_stmt (new_phi);
6266     }
6267
6268   tree indx_before_incr, indx_after_incr, cond_name = NULL;
6269
6270   /* Finalize the reduction-phi (set its arguments) and create the
6271      epilog reduction code.  */
6272   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6273     {
6274       new_temp = gimple_assign_lhs (*vec_stmt);
6275       vect_defs[0] = new_temp;
6276
6277       /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
6278          which is updated with the current index of the loop for every match of
6279          the original loop's cond_expr (VEC_STMT).  This results in a vector
6280          containing the last time the condition passed for that vector lane.
6281          The first match will be a 1 to allow 0 to be used for non-matching
6282          indexes.  If there are no matches at all then the vector will be all
6283          zeroes.  */
6284       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6285         {
6286           int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6287           int k;
6288
6289           gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6290
6291           /* First we create a simple vector induction variable which starts
6292              with the values {1,2,3,...} (SERIES_VECT) and increments by the
6293              vector size (STEP).  */
6294
6295           /* Create a {1,2,3,...} vector.  */
6296           tree *vtemp = XALLOCAVEC (tree, nunits_out);
6297           for (k = 0; k < nunits_out; ++k)
6298             vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6299           tree series_vect = build_vector (cr_index_vector_type, vtemp);
6300
6301           /* Create a vector of the step value.  */
6302           tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6303           tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6304
6305           /* Create an induction variable.  */
6306           gimple_stmt_iterator incr_gsi;
6307           bool insert_after;
6308           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6309           create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6310                      insert_after, &indx_before_incr, &indx_after_incr);
6311
6312           /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6313              filled with zeros (VEC_ZERO).  */
6314
6315           /* Create a vector of 0s.  */
6316           tree zero = build_zero_cst (cr_index_scalar_type);
6317           tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6318
6319           /* Create a vector phi node.  */
6320           tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6321           new_phi = create_phi_node (new_phi_tree, loop->header);
6322           set_vinfo_for_stmt (new_phi,
6323                               new_stmt_vec_info (new_phi, loop_vinfo));
6324           add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6325                        UNKNOWN_LOCATION);
6326
6327           /* Now take the condition from the loops original cond_expr
6328              (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6329              every match uses values from the induction variable
6330              (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6331              (NEW_PHI_TREE).
6332              Finally, we update the phi (NEW_PHI_TREE) to take the value of
6333              the new cond_expr (INDEX_COND_EXPR).  */
6334
6335           /* Duplicate the condition from vec_stmt.  */
6336           tree ccompare = unshare_expr (gimple_assign_rhs1 (*vec_stmt));
6337
6338           /* Create a conditional, where the condition is taken from vec_stmt
6339              (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
6340              else is the phi (NEW_PHI_TREE).  */
6341           tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6342                                          ccompare, indx_before_incr,
6343                                          new_phi_tree);
6344           cond_name = make_ssa_name (cr_index_vector_type);
6345           gimple *index_condition = gimple_build_assign (cond_name,
6346                                                          index_cond_expr);
6347           gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6348           stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6349                                                             loop_vinfo);
6350           STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6351           set_vinfo_for_stmt (index_condition, index_vec_info);
6352
6353           /* Update the phi with the vec cond.  */
6354           add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6355                        UNKNOWN_LOCATION);
6356         }
6357     }
6358
6359   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6360                                     epilog_reduc_code, phis, reduc_index,
6361                                     double_reduc, slp_node, cond_name);
6362
6363   return true;
6364 }
6365
6366 /* Function vect_min_worthwhile_factor.
6367
6368    For a loop where we could vectorize the operation indicated by CODE,
6369    return the minimum vectorization factor that makes it worthwhile
6370    to use generic vectors.  */
6371 int
6372 vect_min_worthwhile_factor (enum tree_code code)
6373 {
6374   switch (code)
6375     {
6376     case PLUS_EXPR:
6377     case MINUS_EXPR:
6378     case NEGATE_EXPR:
6379       return 4;
6380
6381     case BIT_AND_EXPR:
6382     case BIT_IOR_EXPR:
6383     case BIT_XOR_EXPR:
6384     case BIT_NOT_EXPR:
6385       return 2;
6386
6387     default:
6388       return INT_MAX;
6389     }
6390 }
6391
6392
6393 /* Function vectorizable_induction
6394
6395    Check if PHI performs an induction computation that can be vectorized.
6396    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6397    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6398    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6399
6400 bool
6401 vectorizable_induction (gimple *phi,
6402                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6403                         gimple **vec_stmt)
6404 {
6405   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6406   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6407   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6408   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6409   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6410   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6411   tree vec_def;
6412
6413   gcc_assert (ncopies >= 1);
6414   /* FORNOW. These restrictions should be relaxed.  */
6415   if (nested_in_vect_loop_p (loop, phi))
6416     {
6417       imm_use_iterator imm_iter;
6418       use_operand_p use_p;
6419       gimple *exit_phi;
6420       edge latch_e;
6421       tree loop_arg;
6422
6423       if (ncopies > 1)
6424         {
6425           if (dump_enabled_p ())
6426             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6427                              "multiple types in nested loop.\n");
6428           return false;
6429         }
6430
6431       exit_phi = NULL;
6432       latch_e = loop_latch_edge (loop->inner);
6433       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6434       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6435         {
6436           gimple *use_stmt = USE_STMT (use_p);
6437           if (is_gimple_debug (use_stmt))
6438             continue;
6439
6440           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6441             {
6442               exit_phi = use_stmt;
6443               break;
6444             }
6445         }
6446       if (exit_phi)
6447         {
6448           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6449           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6450                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6451             {
6452               if (dump_enabled_p ())
6453                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6454                                  "inner-loop induction only used outside "
6455                                  "of the outer vectorized loop.\n");
6456               return false;
6457             }
6458         }
6459     }
6460
6461   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6462     return false;
6463
6464   /* FORNOW: SLP not supported.  */
6465   if (STMT_SLP_TYPE (stmt_info))
6466     return false;
6467
6468   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
6469
6470   if (gimple_code (phi) != GIMPLE_PHI)
6471     return false;
6472
6473   if (!vec_stmt) /* transformation not required.  */
6474     {
6475       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6476       if (dump_enabled_p ())
6477         dump_printf_loc (MSG_NOTE, vect_location,
6478                          "=== vectorizable_induction ===\n");
6479       vect_model_induction_cost (stmt_info, ncopies);
6480       return true;
6481     }
6482
6483   /** Transform.  **/
6484
6485   if (dump_enabled_p ())
6486     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6487
6488   vec_def = get_initial_def_for_induction (phi);
6489   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
6490   return true;
6491 }
6492
6493 /* Function vectorizable_live_operation.
6494
6495    STMT computes a value that is used outside the loop.  Check if
6496    it can be supported.  */
6497
6498 bool
6499 vectorizable_live_operation (gimple *stmt,
6500                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6501                              slp_tree slp_node, int slp_index,
6502                              gimple **vec_stmt)
6503 {
6504   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6505   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6506   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6507   imm_use_iterator imm_iter;
6508   tree lhs, lhs_type, bitsize, vec_bitsize;
6509   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6510   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6511   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6512   gimple *use_stmt;
6513   auto_vec<tree> vec_oprnds;
6514
6515   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6516
6517   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6518     return false;
6519
6520   /* FORNOW.  CHECKME.  */
6521   if (nested_in_vect_loop_p (loop, stmt))
6522     return false;
6523
6524   /* If STMT is not relevant and it is a simple assignment and its inputs are
6525      invariant then it can remain in place, unvectorized.  The original last
6526      scalar value that it computes will be used.  */
6527   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6528     {
6529       gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo));
6530       if (dump_enabled_p ())
6531         dump_printf_loc (MSG_NOTE, vect_location,
6532                          "statement is simple and uses invariant.  Leaving in "
6533                          "place.\n");
6534       return true;
6535     }
6536
6537   if (!vec_stmt)
6538     /* No transformation required.  */
6539     return true;
6540
6541   /* If stmt has a related stmt, then use that for getting the lhs.  */
6542   if (is_pattern_stmt_p (stmt_info))
6543     stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6544
6545   lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
6546         : gimple_get_lhs (stmt);
6547   lhs_type = TREE_TYPE (lhs);
6548
6549   bitsize = TYPE_SIZE (TREE_TYPE (vectype));
6550   vec_bitsize = TYPE_SIZE (vectype);
6551
6552   /* Get the vectorized lhs of STMT and the lane to use (counted in bits).  */
6553   tree vec_lhs, bitstart;
6554   if (slp_node)
6555     {
6556       gcc_assert (slp_index >= 0);
6557
6558       int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6559       int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6560
6561       /* Get the last occurrence of the scalar index from the concatenation of
6562          all the slp vectors. Calculate which slp vector it is and the index
6563          within.  */
6564       int pos = (num_vec * nunits) - num_scalar + slp_index;
6565       int vec_entry = pos / nunits;
6566       int vec_index = pos % nunits;
6567
6568       /* Get the correct slp vectorized stmt.  */
6569       vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]);
6570
6571       /* Get entry to use.  */
6572       bitstart = build_int_cst (unsigned_type_node, vec_index);
6573       bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
6574     }
6575   else
6576     {
6577       enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
6578       vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt);
6579
6580       /* For multiple copies, get the last copy.  */
6581       for (int i = 1; i < ncopies; ++i)
6582         vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type,
6583                                                   vec_lhs);
6584
6585       /* Get the last lane in the vector.  */
6586       bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
6587     }
6588
6589   /* Create a new vectorized stmt for the uses of STMT and insert outside the
6590      loop.  */
6591   gimple_seq stmts = NULL;
6592   tree new_tree = build3 (BIT_FIELD_REF, TREE_TYPE (vectype), vec_lhs, bitsize,
6593                           bitstart);
6594   new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts,
6595                                    true, NULL_TREE);
6596   if (stmts)
6597     gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
6598
6599   /* Replace use of lhs with newly computed result.  If the use stmt is a
6600      single arg PHI, just replace all uses of PHI result.  It's necessary
6601      because lcssa PHI defining lhs may be before newly inserted stmt.  */
6602   use_operand_p use_p;
6603   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
6604     if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
6605         && !is_gimple_debug (use_stmt))
6606     {
6607       if (gimple_code (use_stmt) == GIMPLE_PHI
6608           && gimple_phi_num_args (use_stmt) == 1)
6609         {
6610           replace_uses_by (gimple_phi_result (use_stmt), new_tree);
6611         }
6612       else
6613         {
6614           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6615             SET_USE (use_p, new_tree);
6616         }
6617       update_stmt (use_stmt);
6618     }
6619
6620   return true;
6621 }
6622
6623 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
6624
6625 static void
6626 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6627 {
6628   ssa_op_iter op_iter;
6629   imm_use_iterator imm_iter;
6630   def_operand_p def_p;
6631   gimple *ustmt;
6632
6633   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6634     {
6635       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6636         {
6637           basic_block bb;
6638
6639           if (!is_gimple_debug (ustmt))
6640             continue;
6641
6642           bb = gimple_bb (ustmt);
6643
6644           if (!flow_bb_inside_loop_p (loop, bb))
6645             {
6646               if (gimple_debug_bind_p (ustmt))
6647                 {
6648                   if (dump_enabled_p ())
6649                     dump_printf_loc (MSG_NOTE, vect_location,
6650                                      "killing debug use\n");
6651
6652                   gimple_debug_bind_reset_value (ustmt);
6653                   update_stmt (ustmt);
6654                 }
6655               else
6656                 gcc_unreachable ();
6657             }
6658         }
6659     }
6660 }
6661
6662 /* Given loop represented by LOOP_VINFO, return true if computation of
6663    LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
6664    otherwise.  */
6665
6666 static bool
6667 loop_niters_no_overflow (loop_vec_info loop_vinfo)
6668 {
6669   /* Constant case.  */
6670   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6671     {
6672       tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
6673       tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
6674
6675       gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
6676       gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
6677       if (wi::to_widest (cst_nitersm1) < wi::to_widest (cst_niters))
6678         return true;
6679     }
6680
6681   widest_int max;
6682   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6683   /* Check the upper bound of loop niters.  */
6684   if (get_max_loop_iterations (loop, &max))
6685     {
6686       tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
6687       signop sgn = TYPE_SIGN (type);
6688       widest_int type_max = widest_int::from (wi::max_value (type), sgn);
6689       if (max < type_max)
6690         return true;
6691     }
6692   return false;
6693 }
6694
6695 /* Function vect_transform_loop.
6696
6697    The analysis phase has determined that the loop is vectorizable.
6698    Vectorize the loop - created vectorized stmts to replace the scalar
6699    stmts in the loop, and update the loop exit condition.  */
6700
6701 void
6702 vect_transform_loop (loop_vec_info loop_vinfo)
6703 {
6704   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6705   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6706   int nbbs = loop->num_nodes;
6707   int i;
6708   tree niters_vector = NULL;
6709   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6710   bool grouped_store;
6711   bool slp_scheduled = false;
6712   gimple *stmt, *pattern_stmt;
6713   gimple_seq pattern_def_seq = NULL;
6714   gimple_stmt_iterator pattern_def_si = gsi_none ();
6715   bool transform_pattern_stmt = false;
6716   bool check_profitability = false;
6717   int th;
6718   /* Record number of iterations before we started tampering with the profile. */
6719   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
6720
6721   if (dump_enabled_p ())
6722     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6723
6724   /* If profile is inprecise, we have chance to fix it up.  */
6725   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6726     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
6727
6728   /* Use the more conservative vectorization threshold.  If the number
6729      of iterations is constant assume the cost check has been performed
6730      by our caller.  If the threshold makes all loops profitable that
6731      run at least the vectorization factor number of times checking
6732      is pointless, too.  */
6733   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6734   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6735       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6736     {
6737       if (dump_enabled_p ())
6738         dump_printf_loc (MSG_NOTE, vect_location,
6739                          "Profitability threshold is %d loop iterations.\n",
6740                          th);
6741       check_profitability = true;
6742     }
6743
6744   /* Make sure there exists a single-predecessor exit bb.  Do this before
6745      versioning.   */
6746   edge e = single_exit (loop);
6747   if (! single_pred_p (e->dest))
6748     {
6749       split_loop_exit_edge (e);
6750       if (dump_enabled_p ())
6751         dump_printf (MSG_NOTE, "split exit edge\n");
6752     }
6753
6754   /* Version the loop first, if required, so the profitability check
6755      comes first.  */
6756
6757   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
6758     {
6759       vect_loop_versioning (loop_vinfo, th, check_profitability);
6760       check_profitability = false;
6761     }
6762
6763   /* Make sure there exists a single-predecessor exit bb also on the
6764      scalar loop copy.  Do this after versioning but before peeling
6765      so CFG structure is fine for both scalar and if-converted loop
6766      to make slpeel_duplicate_current_defs_from_edges face matched
6767      loop closed PHI nodes on the exit.  */
6768   if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
6769     {
6770       e = single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo));
6771       if (! single_pred_p (e->dest))
6772         {
6773           split_loop_exit_edge (e);
6774           if (dump_enabled_p ())
6775             dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
6776         }
6777     }
6778
6779   tree niters = vect_build_loop_niters (loop_vinfo);
6780   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
6781   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
6782   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
6783   vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
6784                    check_profitability, niters_no_overflow);
6785   if (niters_vector == NULL_TREE)
6786     {
6787       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6788         niters_vector
6789           = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6790                            LOOP_VINFO_INT_NITERS (loop_vinfo) / vf);
6791       else
6792         vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
6793                                      niters_no_overflow);
6794     }
6795
6796   /* 1) Make sure the loop header has exactly two entries
6797      2) Make sure we have a preheader basic block.  */
6798
6799   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6800
6801   split_edge (loop_preheader_edge (loop));
6802
6803   /* FORNOW: the vectorizer supports only loops which body consist
6804      of one basic block (header + empty latch). When the vectorizer will
6805      support more involved loop forms, the order by which the BBs are
6806      traversed need to be reconsidered.  */
6807
6808   for (i = 0; i < nbbs; i++)
6809     {
6810       basic_block bb = bbs[i];
6811       stmt_vec_info stmt_info;
6812
6813       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6814            gsi_next (&si))
6815         {
6816           gphi *phi = si.phi ();
6817           if (dump_enabled_p ())
6818             {
6819               dump_printf_loc (MSG_NOTE, vect_location,
6820                                "------>vectorizing phi: ");
6821               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6822             }
6823           stmt_info = vinfo_for_stmt (phi);
6824           if (!stmt_info)
6825             continue;
6826
6827           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6828             vect_loop_kill_debug_uses (loop, phi);
6829
6830           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6831               && !STMT_VINFO_LIVE_P (stmt_info))
6832             continue;
6833
6834           if (STMT_VINFO_VECTYPE (stmt_info)
6835               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6836                   != (unsigned HOST_WIDE_INT) vf)
6837               && dump_enabled_p ())
6838             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6839
6840           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6841             {
6842               if (dump_enabled_p ())
6843                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6844               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6845             }
6846         }
6847
6848       pattern_stmt = NULL;
6849       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6850            !gsi_end_p (si) || transform_pattern_stmt;)
6851         {
6852           bool is_store;
6853
6854           if (transform_pattern_stmt)
6855             stmt = pattern_stmt;
6856           else
6857             {
6858               stmt = gsi_stmt (si);
6859               /* During vectorization remove existing clobber stmts.  */
6860               if (gimple_clobber_p (stmt))
6861                 {
6862                   unlink_stmt_vdef (stmt);
6863                   gsi_remove (&si, true);
6864                   release_defs (stmt);
6865                   continue;
6866                 }
6867             }
6868
6869           if (dump_enabled_p ())
6870             {
6871               dump_printf_loc (MSG_NOTE, vect_location,
6872                                "------>vectorizing statement: ");
6873               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6874             }
6875
6876           stmt_info = vinfo_for_stmt (stmt);
6877
6878           /* vector stmts created in the outer-loop during vectorization of
6879              stmts in an inner-loop may not have a stmt_info, and do not
6880              need to be vectorized.  */
6881           if (!stmt_info)
6882             {
6883               gsi_next (&si);
6884               continue;
6885             }
6886
6887           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6888             vect_loop_kill_debug_uses (loop, stmt);
6889
6890           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6891               && !STMT_VINFO_LIVE_P (stmt_info))
6892             {
6893               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6894                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6895                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6896                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6897                 {
6898                   stmt = pattern_stmt;
6899                   stmt_info = vinfo_for_stmt (stmt);
6900                 }
6901               else
6902                 {
6903                   gsi_next (&si);
6904                   continue;
6905                 }
6906             }
6907           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6908                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6909                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6910                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6911             transform_pattern_stmt = true;
6912
6913           /* If pattern statement has def stmts, vectorize them too.  */
6914           if (is_pattern_stmt_p (stmt_info))
6915             {
6916               if (pattern_def_seq == NULL)
6917                 {
6918                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6919                   pattern_def_si = gsi_start (pattern_def_seq);
6920                 }
6921               else if (!gsi_end_p (pattern_def_si))
6922                 gsi_next (&pattern_def_si);
6923               if (pattern_def_seq != NULL)
6924                 {
6925                   gimple *pattern_def_stmt = NULL;
6926                   stmt_vec_info pattern_def_stmt_info = NULL;
6927
6928                   while (!gsi_end_p (pattern_def_si))
6929                     {
6930                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6931                       pattern_def_stmt_info
6932                         = vinfo_for_stmt (pattern_def_stmt);
6933                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6934                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6935                         break;
6936                       gsi_next (&pattern_def_si);
6937                     }
6938
6939                   if (!gsi_end_p (pattern_def_si))
6940                     {
6941                       if (dump_enabled_p ())
6942                         {
6943                           dump_printf_loc (MSG_NOTE, vect_location,
6944                                            "==> vectorizing pattern def "
6945                                            "stmt: ");
6946                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6947                                             pattern_def_stmt, 0);
6948                         }
6949
6950                       stmt = pattern_def_stmt;
6951                       stmt_info = pattern_def_stmt_info;
6952                     }
6953                   else
6954                     {
6955                       pattern_def_si = gsi_none ();
6956                       transform_pattern_stmt = false;
6957                     }
6958                 }
6959               else
6960                 transform_pattern_stmt = false;
6961             }
6962
6963           if (STMT_VINFO_VECTYPE (stmt_info))
6964             {
6965               unsigned int nunits
6966                 = (unsigned int)
6967                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6968               if (!STMT_SLP_TYPE (stmt_info)
6969                   && nunits != (unsigned int) vf
6970                   && dump_enabled_p ())
6971                   /* For SLP VF is set according to unrolling factor, and not
6972                      to vector size, hence for SLP this print is not valid.  */
6973                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6974             }
6975
6976           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6977              reached.  */
6978           if (STMT_SLP_TYPE (stmt_info))
6979             {
6980               if (!slp_scheduled)
6981                 {
6982                   slp_scheduled = true;
6983
6984                   if (dump_enabled_p ())
6985                     dump_printf_loc (MSG_NOTE, vect_location,
6986                                      "=== scheduling SLP instances ===\n");
6987
6988                   vect_schedule_slp (loop_vinfo);
6989                 }
6990
6991               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6992               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6993                 {
6994                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6995                     {
6996                       pattern_def_seq = NULL;
6997                       gsi_next (&si);
6998                     }
6999                   continue;
7000                 }
7001             }
7002
7003           /* -------- vectorize statement ------------ */
7004           if (dump_enabled_p ())
7005             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
7006
7007           grouped_store = false;
7008           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
7009           if (is_store)
7010             {
7011               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7012                 {
7013                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7014                      interleaving chain was completed - free all the stores in
7015                      the chain.  */
7016                   gsi_next (&si);
7017                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
7018                 }
7019               else
7020                 {
7021                   /* Free the attached stmt_vec_info and remove the stmt.  */
7022                   gimple *store = gsi_stmt (si);
7023                   free_stmt_vec_info (store);
7024                   unlink_stmt_vdef (store);
7025                   gsi_remove (&si, true);
7026                   release_defs (store);
7027                 }
7028
7029               /* Stores can only appear at the end of pattern statements.  */
7030               gcc_assert (!transform_pattern_stmt);
7031               pattern_def_seq = NULL;
7032             }
7033           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7034             {
7035               pattern_def_seq = NULL;
7036               gsi_next (&si);
7037             }
7038         }                       /* stmts in BB */
7039     }                           /* BBs in loop */
7040
7041   slpeel_make_loop_iterate_ntimes (loop, niters_vector);
7042
7043   /* Reduce loop iterations by the vectorization factor.  */
7044   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vf),
7045                       expected_iterations / vf);
7046   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
7047     {
7048       if (loop->nb_iterations_upper_bound != 0)
7049         loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
7050       if (loop->nb_iterations_likely_upper_bound != 0)
7051         loop->nb_iterations_likely_upper_bound
7052            = loop->nb_iterations_likely_upper_bound - 1;
7053     }
7054   loop->nb_iterations_upper_bound
7055     = wi::udiv_floor (loop->nb_iterations_upper_bound + 1, vf) - 1;
7056   loop->nb_iterations_likely_upper_bound
7057     = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + 1, vf) - 1;
7058
7059   if (loop->any_estimate)
7060     {
7061       loop->nb_iterations_estimate
7062         = wi::udiv_floor (loop->nb_iterations_estimate, vf);
7063        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
7064            && loop->nb_iterations_estimate != 0)
7065          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
7066     }
7067
7068   if (dump_enabled_p ())
7069     {
7070       dump_printf_loc (MSG_NOTE, vect_location,
7071                        "LOOP VECTORIZED\n");
7072       if (loop->inner)
7073         dump_printf_loc (MSG_NOTE, vect_location,
7074                          "OUTER LOOP VECTORIZED\n");
7075       dump_printf (MSG_NOTE, "\n");
7076     }
7077
7078   /* Free SLP instances here because otherwise stmt reference counting
7079      won't work.  */
7080   slp_instance instance;
7081   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
7082     vect_free_slp_instance (instance);
7083   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
7084   /* Clear-up safelen field since its value is invalid after vectorization
7085      since vectorized loop can have loop-carried dependencies.  */
7086   loop->safelen = 0;
7087 }
7088
7089 /* The code below is trying to perform simple optimization - revert
7090    if-conversion for masked stores, i.e. if the mask of a store is zero
7091    do not perform it and all stored value producers also if possible.
7092    For example,
7093      for (i=0; i<n; i++)
7094        if (c[i])
7095         {
7096           p1[i] += 1;
7097           p2[i] = p3[i] +2;
7098         }
7099    this transformation will produce the following semi-hammock:
7100
7101    if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
7102      {
7103        vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
7104        vect__12.22_172 = vect__11.19_170 + vect_cst__171;
7105        MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
7106        vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
7107        vect__19.28_184 = vect__18.25_182 + vect_cst__183;
7108        MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
7109      }
7110 */
7111
7112 void
7113 optimize_mask_stores (struct loop *loop)
7114 {
7115   basic_block *bbs = get_loop_body (loop);
7116   unsigned nbbs = loop->num_nodes;
7117   unsigned i;
7118   basic_block bb;
7119   gimple_stmt_iterator gsi;
7120   gimple *stmt;
7121   auto_vec<gimple *> worklist;
7122
7123   vect_location = find_loop_location (loop);
7124   /* Pick up all masked stores in loop if any.  */
7125   for (i = 0; i < nbbs; i++)
7126     {
7127       bb = bbs[i];
7128       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
7129            gsi_next (&gsi))
7130         {
7131           stmt = gsi_stmt (gsi);
7132           if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
7133             worklist.safe_push (stmt);
7134         }
7135     }
7136
7137   free (bbs);
7138   if (worklist.is_empty ())
7139     return;
7140
7141   /* Loop has masked stores.  */
7142   while (!worklist.is_empty ())
7143     {
7144       gimple *last, *last_store;
7145       edge e, efalse;
7146       tree mask;
7147       basic_block store_bb, join_bb;
7148       gimple_stmt_iterator gsi_to;
7149       tree vdef, new_vdef;
7150       gphi *phi;
7151       tree vectype;
7152       tree zero;
7153
7154       last = worklist.pop ();
7155       mask = gimple_call_arg (last, 2);
7156       bb = gimple_bb (last);
7157       /* Create new bb.  */
7158       e = split_block (bb, last);
7159       join_bb = e->dest;
7160       store_bb = create_empty_bb (bb);
7161       add_bb_to_loop (store_bb, loop);
7162       e->flags = EDGE_TRUE_VALUE;
7163       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
7164       /* Put STORE_BB to likely part.  */
7165       efalse->probability = PROB_UNLIKELY;
7166       store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse);
7167       make_edge (store_bb, join_bb, EDGE_FALLTHRU);
7168       if (dom_info_available_p (CDI_DOMINATORS))
7169         set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
7170       if (dump_enabled_p ())
7171         dump_printf_loc (MSG_NOTE, vect_location,
7172                          "Create new block %d to sink mask stores.",
7173                          store_bb->index);
7174       /* Create vector comparison with boolean result.  */
7175       vectype = TREE_TYPE (mask);
7176       zero = build_zero_cst (vectype);
7177       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
7178       gsi = gsi_last_bb (bb);
7179       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7180       /* Create new PHI node for vdef of the last masked store:
7181          .MEM_2 = VDEF <.MEM_1>
7182          will be converted to
7183          .MEM.3 = VDEF <.MEM_1>
7184          and new PHI node will be created in join bb
7185          .MEM_2 = PHI <.MEM_1, .MEM_3>
7186       */
7187       vdef = gimple_vdef (last);
7188       new_vdef = make_ssa_name (gimple_vop (cfun), last);
7189       gimple_set_vdef (last, new_vdef);
7190       phi = create_phi_node (vdef, join_bb);
7191       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
7192
7193       /* Put all masked stores with the same mask to STORE_BB if possible.  */
7194       while (true)
7195         {
7196           gimple_stmt_iterator gsi_from;
7197           gimple *stmt1 = NULL;
7198
7199           /* Move masked store to STORE_BB.  */
7200           last_store = last;
7201           gsi = gsi_for_stmt (last);
7202           gsi_from = gsi;
7203           /* Shift GSI to the previous stmt for further traversal.  */
7204           gsi_prev (&gsi);
7205           gsi_to = gsi_start_bb (store_bb);
7206           gsi_move_before (&gsi_from, &gsi_to);
7207           /* Setup GSI_TO to the non-empty block start.  */
7208           gsi_to = gsi_start_bb (store_bb);
7209           if (dump_enabled_p ())
7210             {
7211               dump_printf_loc (MSG_NOTE, vect_location,
7212                                "Move stmt to created bb\n");
7213               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
7214             }
7215           /* Move all stored value producers if possible.  */
7216           while (!gsi_end_p (gsi))
7217             {
7218               tree lhs;
7219               imm_use_iterator imm_iter;
7220               use_operand_p use_p;
7221               bool res;
7222
7223               /* Skip debug statements.  */
7224               if (is_gimple_debug (gsi_stmt (gsi)))
7225                 {
7226                   gsi_prev (&gsi);
7227                   continue;
7228                 }
7229               stmt1 = gsi_stmt (gsi);
7230               /* Do not consider statements writing to memory or having
7231                  volatile operand.  */
7232               if (gimple_vdef (stmt1)
7233                   || gimple_has_volatile_ops (stmt1))
7234                 break;
7235               gsi_from = gsi;
7236               gsi_prev (&gsi);
7237               lhs = gimple_get_lhs (stmt1);
7238               if (!lhs)
7239                 break;
7240
7241               /* LHS of vectorized stmt must be SSA_NAME.  */
7242               if (TREE_CODE (lhs) != SSA_NAME)
7243                 break;
7244
7245               if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
7246                 {
7247                   /* Remove dead scalar statement.  */
7248                   if (has_zero_uses (lhs))
7249                     {
7250                       gsi_remove (&gsi_from, true);
7251                       continue;
7252                     }
7253                 }
7254
7255               /* Check that LHS does not have uses outside of STORE_BB.  */
7256               res = true;
7257               FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
7258                 {
7259                   gimple *use_stmt;
7260                   use_stmt = USE_STMT (use_p);
7261                   if (is_gimple_debug (use_stmt))
7262                     continue;
7263                   if (gimple_bb (use_stmt) != store_bb)
7264                     {
7265                       res = false;
7266                       break;
7267                     }
7268                 }
7269               if (!res)
7270                 break;
7271
7272               if (gimple_vuse (stmt1)
7273                   && gimple_vuse (stmt1) != gimple_vuse (last_store))
7274                 break;
7275
7276               /* Can move STMT1 to STORE_BB.  */
7277               if (dump_enabled_p ())
7278                 {
7279                   dump_printf_loc (MSG_NOTE, vect_location,
7280                                    "Move stmt to created bb\n");
7281                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
7282                 }
7283               gsi_move_before (&gsi_from, &gsi_to);
7284               /* Shift GSI_TO for further insertion.  */
7285               gsi_prev (&gsi_to);
7286             }
7287           /* Put other masked stores with the same mask to STORE_BB.  */
7288           if (worklist.is_empty ()
7289               || gimple_call_arg (worklist.last (), 2) != mask
7290               || worklist.last () != stmt1)
7291             break;
7292           last = worklist.pop ();
7293         }
7294       add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
7295     }
7296 }