gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "cfghooks.h"
  31 #include "tree-pass.h"
  32 #include "ssa.h"
  33 #include "optabs-tree.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "cfganal.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-ssa-loop.h"
  45 #include "cfgloop.h"
  46 #include "params.h"
  47 #include "tree-scalar-evolution.h"
  48 #include "tree-vectorizer.h"
  49 #include "gimple-fold.h"
  50 #include "cgraph.h"
  51 #include "tree-cfg.h"
  52 #include "tree-if-conv.h"
  53
  54 /* Loop Vectorization Pass.
  55
  56    This pass tries to vectorize loops.
  57
  58    For example, the vectorizer transforms the following simple loop:
  59
  60         short a[N]; short b[N]; short c[N]; int i;
  61
  62         for (i=0; i<N; i++){
  63           a[i] = b[i] + c[i];
  64         }
  65
  66    as if it was manually vectorized by rewriting the source code into:
  67
  68         typedef int __attribute__((mode(V8HI))) v8hi;
  69         short a[N];  short b[N]; short c[N];   int i;
  70         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  71         v8hi va, vb, vc;
  72
  73         for (i=0; i<N/8; i++){
  74           vb = pb[i];
  75           vc = pc[i];
  76           va = vb + vc;
  77           pa[i] = va;
  78         }
  79
  80         The main entry to this pass is vectorize_loops(), in which
  81    the vectorizer applies a set of analyses on a given set of loops,
  82    followed by the actual vectorization transformation for the loops that
  83    had successfully passed the analysis phase.
  84         Throughout this pass we make a distinction between two types of
  85    data: scalars (which are represented by SSA_NAMES), and memory references
  86    ("data-refs").  These two types of data require different handling both
  87    during analysis and transformation. The types of data-refs that the
  88    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  89    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  90    accesses are required to have a simple (consecutive) access pattern.
  91
  92    Analysis phase:
  93    ===============
  94         The driver for the analysis phase is vect_analyze_loop().
  95    It applies a set of analyses, some of which rely on the scalar evolution
  96    analyzer (scev) developed by Sebastian Pop.
  97
  98         During the analysis phase the vectorizer records some information
  99    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
 100    loop, as well as general information about the loop as a whole, which is
 101    recorded in a "loop_vec_info" struct attached to each loop.
 102
 103    Transformation phase:
 104    =====================
 105         The loop transformation phase scans all the stmts in the loop, and
 106    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 107    the loop that needs to be vectorized.  It inserts the vector code sequence
 108    just before the scalar stmt S, and records a pointer to the vector code
 109    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 110    attached to S).  This pointer will be used for the vectorization of following
 111    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 112    otherwise, we rely on dead code elimination for removing it.
 113
 114         For example, say stmt S1 was vectorized into stmt VS1:
 115
 116    VS1: vb = px[i];
 117    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 118    S2:  a = b;
 119
 120    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 121    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 122    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 123    resulting sequence would be:
 124
 125    VS1: vb = px[i];
 126    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 127    VS2: va = vb;
 128    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 129
 130         Operands that are not SSA_NAMEs, are data-refs that appear in
 131    load/store operations (like 'x[i]' in S1), and are handled differently.
 132
 133    Target modeling:
 134    =================
 135         Currently the only target specific information that is used is the
 136    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 137    Targets that can support different sizes of vectors, for now will need
 138    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 139    flexibility will be added in the future.
 140
 141         Since we only vectorize operations which vector form can be
 142    expressed using existing tree codes, to verify that an operation is
 143    supported, the vectorizer checks the relevant optab at the relevant
 144    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 145    the value found is CODE_FOR_nothing, then there's no target support, and
 146    we can't vectorize the stmt.
 147
 148    For additional information on this project see:
 149    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 150 */
 151
 152 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 153
 154 /* Function vect_determine_vectorization_factor
 155
 156    Determine the vectorization factor (VF).  VF is the number of data elements
 157    that are operated upon in parallel in a single iteration of the vectorized
 158    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 159    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 160    elements can fit in a single vector register.
 161
 162    We currently support vectorization of loops in which all types operated upon
 163    are of the same size.  Therefore this function currently sets VF according to
 164    the size of the types operated upon, and fails if there are multiple sizes
 165    in the loop.
 166
 167    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 168    original loop:
 169         for (i=0; i<N; i++){
 170           a[i] = b[i] + c[i];
 171         }
 172
 173    vectorized loop:
 174         for (i=0; i<N; i+=VF){
 175           a[i:VF] = b[i:VF] + c[i:VF];
 176         }
 177 */
 178
 179 static bool
 180 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 181 {
 182   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 183   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 184   unsigned nbbs = loop->num_nodes;
 185   unsigned int vectorization_factor = 0;
 186   tree scalar_type = NULL_TREE;
 187   gphi *phi;
 188   tree vectype;
 189   unsigned int nunits;
 190   stmt_vec_info stmt_info;
 191   unsigned i;
 192   HOST_WIDE_INT dummy;
 193   gimple *stmt, *pattern_stmt = NULL;
 194   gimple_seq pattern_def_seq = NULL;
 195   gimple_stmt_iterator pattern_def_si = gsi_none ();
 196   bool analyze_pattern_stmt = false;
 197   bool bool_result;
 198   auto_vec<stmt_vec_info> mask_producers;
 199
 200   if (dump_enabled_p ())
 201     dump_printf_loc (MSG_NOTE, vect_location,
 202                      "=== vect_determine_vectorization_factor ===\n");
 203
 204   for (i = 0; i < nbbs; i++)
 205     {
 206       basic_block bb = bbs[i];
 207
 208       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 209            gsi_next (&si))
 210         {
 211           phi = si.phi ();
 212           stmt_info = vinfo_for_stmt (phi);
 213           if (dump_enabled_p ())
 214             {
 215               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 216               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 217             }
 218
 219           gcc_assert (stmt_info);
 220
 221           if (STMT_VINFO_RELEVANT_P (stmt_info)
 222               || STMT_VINFO_LIVE_P (stmt_info))
 223             {
 224               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 225               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 226
 227               if (dump_enabled_p ())
 228                 {
 229                   dump_printf_loc (MSG_NOTE, vect_location,
 230                                    "get vectype for scalar type:  ");
 231                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 232                   dump_printf (MSG_NOTE, "\n");
 233                 }
 234
 235               vectype = get_vectype_for_scalar_type (scalar_type);
 236               if (!vectype)
 237                 {
 238                   if (dump_enabled_p ())
 239                     {
 240                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 241                                        "not vectorized: unsupported "
 242                                        "data-type ");
 243                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 244                                          scalar_type);
 245                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 246                     }
 247                   return false;
 248                 }
 249               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 250
 251               if (dump_enabled_p ())
 252                 {
 253                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 254                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 255                   dump_printf (MSG_NOTE, "\n");
 256                 }
 257
 258               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 259               if (dump_enabled_p ())
 260                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 261                                  nunits);
 262
 263               if (!vectorization_factor
 264                   || (nunits > vectorization_factor))
 265                 vectorization_factor = nunits;
 266             }
 267         }
 268
 269       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 270            !gsi_end_p (si) || analyze_pattern_stmt;)
 271         {
 272           tree vf_vectype;
 273
 274           if (analyze_pattern_stmt)
 275             stmt = pattern_stmt;
 276           else
 277             stmt = gsi_stmt (si);
 278
 279           stmt_info = vinfo_for_stmt (stmt);
 280
 281           if (dump_enabled_p ())
 282             {
 283               dump_printf_loc (MSG_NOTE, vect_location,
 284                                "==> examining statement: ");
 285               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 286             }
 287
 288           gcc_assert (stmt_info);
 289
 290           /* Skip stmts which do not need to be vectorized.  */
 291           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 292                && !STMT_VINFO_LIVE_P (stmt_info))
 293               || gimple_clobber_p (stmt))
 294             {
 295               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 296                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 297                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 298                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 299                 {
 300                   stmt = pattern_stmt;
 301                   stmt_info = vinfo_for_stmt (pattern_stmt);
 302                   if (dump_enabled_p ())
 303                     {
 304                       dump_printf_loc (MSG_NOTE, vect_location,
 305                                        "==> examining pattern statement: ");
 306                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 307                     }
 308                 }
 309               else
 310                 {
 311                   if (dump_enabled_p ())
 312                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 313                   gsi_next (&si);
 314                   continue;
 315                 }
 316             }
 317           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 318                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 319                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 320                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 321             analyze_pattern_stmt = true;
 322
 323           /* If a pattern statement has def stmts, analyze them too.  */
 324           if (is_pattern_stmt_p (stmt_info))
 325             {
 326               if (pattern_def_seq == NULL)
 327                 {
 328                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 329                   pattern_def_si = gsi_start (pattern_def_seq);
 330                 }
 331               else if (!gsi_end_p (pattern_def_si))
 332                 gsi_next (&pattern_def_si);
 333               if (pattern_def_seq != NULL)
 334                 {
 335                   gimple *pattern_def_stmt = NULL;
 336                   stmt_vec_info pattern_def_stmt_info = NULL;
 337
 338                   while (!gsi_end_p (pattern_def_si))
 339                     {
 340                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 341                       pattern_def_stmt_info
 342                         = vinfo_for_stmt (pattern_def_stmt);
 343                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 344                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 345                         break;
 346                       gsi_next (&pattern_def_si);
 347                     }
 348
 349                   if (!gsi_end_p (pattern_def_si))
 350                     {
 351                       if (dump_enabled_p ())
 352                         {
 353                           dump_printf_loc (MSG_NOTE, vect_location,
 354                                            "==> examining pattern def stmt: ");
 355                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 356                                             pattern_def_stmt, 0);
 357                         }
 358
 359                       stmt = pattern_def_stmt;
 360                       stmt_info = pattern_def_stmt_info;
 361                     }
 362                   else
 363                     {
 364                       pattern_def_si = gsi_none ();
 365                       analyze_pattern_stmt = false;
 366                     }
 367                 }
 368               else
 369                 analyze_pattern_stmt = false;
 370             }
 371
 372           if (gimple_get_lhs (stmt) == NULL_TREE
 373               /* MASK_STORE has no lhs, but is ok.  */
 374               && (!is_gimple_call (stmt)
 375                   || !gimple_call_internal_p (stmt)
 376                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 377             {
 378               if (is_gimple_call (stmt))
 379                 {
 380                   /* Ignore calls with no lhs.  These must be calls to
 381                      #pragma omp simd functions, and what vectorization factor
 382                      it really needs can't be determined until
 383                      vectorizable_simd_clone_call.  */
 384                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 385                     {
 386                       pattern_def_seq = NULL;
 387                       gsi_next (&si);
 388                     }
 389                   continue;
 390                 }
 391               if (dump_enabled_p ())
 392                 {
 393                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 394                                    "not vectorized: irregular stmt.");
 395                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 396                                     0);
 397                 }
 398               return false;
 399             }
 400
 401           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 402             {
 403               if (dump_enabled_p ())
 404                 {
 405                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 406                                    "not vectorized: vector stmt in loop:");
 407                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 408                 }
 409               return false;
 410             }
 411
 412           bool_result = false;
 413
 414           if (STMT_VINFO_VECTYPE (stmt_info))
 415             {
 416               /* The only case when a vectype had been already set is for stmts
 417                  that contain a dataref, or for "pattern-stmts" (stmts
 418                  generated by the vectorizer to represent/replace a certain
 419                  idiom).  */
 420               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 421                           || is_pattern_stmt_p (stmt_info)
 422                           || !gsi_end_p (pattern_def_si));
 423               vectype = STMT_VINFO_VECTYPE (stmt_info);
 424             }
 425           else
 426             {
 427               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 428               if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
 429                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 430               else
 431                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 432
 433               /* Bool ops don't participate in vectorization factor
 434                  computation.  For comparison use compared types to
 435                  compute a factor.  */
 436               if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
 437                   && is_gimple_assign (stmt)
 438                   && gimple_assign_rhs_code (stmt) != COND_EXPR)
 439                 {
 440                   if (STMT_VINFO_RELEVANT_P (stmt_info)
 441                       || STMT_VINFO_LIVE_P (stmt_info))
 442                     mask_producers.safe_push (stmt_info);
 443                   bool_result = true;
 444
 445                   if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
 446                       == tcc_comparison
 447                       && !VECT_SCALAR_BOOLEAN_TYPE_P
 448                             (TREE_TYPE (gimple_assign_rhs1 (stmt))))
 449                     scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 450                   else
 451                     {
 452                       if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 453                         {
 454                           pattern_def_seq = NULL;
 455                           gsi_next (&si);
 456                         }
 457                       continue;
 458                     }
 459                 }
 460
 461               if (dump_enabled_p ())
 462                 {
 463                   dump_printf_loc (MSG_NOTE, vect_location,
 464                                    "get vectype for scalar type:  ");
 465                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 466                   dump_printf (MSG_NOTE, "\n");
 467                 }
 468               vectype = get_vectype_for_scalar_type (scalar_type);
 469               if (!vectype)
 470                 {
 471                   if (dump_enabled_p ())
 472                     {
 473                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 474                                        "not vectorized: unsupported "
 475                                        "data-type ");
 476                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 477                                          scalar_type);
 478                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 479                     }
 480                   return false;
 481                 }
 482
 483               if (!bool_result)
 484                 STMT_VINFO_VECTYPE (stmt_info) = vectype;
 485
 486               if (dump_enabled_p ())
 487                 {
 488                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 489                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 490                   dump_printf (MSG_NOTE, "\n");
 491                 }
 492             }
 493
 494           /* Don't try to compute VF out scalar types if we stmt
 495              produces boolean vector.  Use result vectype instead.  */
 496           if (VECTOR_BOOLEAN_TYPE_P (vectype))
 497             vf_vectype = vectype;
 498           else
 499             {
 500               /* The vectorization factor is according to the smallest
 501                  scalar type (or the largest vector size, but we only
 502                  support one vector size per loop).  */
 503               if (!bool_result)
 504                 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 505                                                              &dummy);
 506               if (dump_enabled_p ())
 507                 {
 508                   dump_printf_loc (MSG_NOTE, vect_location,
 509                                    "get vectype for scalar type:  ");
 510                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 511                   dump_printf (MSG_NOTE, "\n");
 512                 }
 513               vf_vectype = get_vectype_for_scalar_type (scalar_type);
 514             }
 515           if (!vf_vectype)
 516             {
 517               if (dump_enabled_p ())
 518                 {
 519                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 520                                    "not vectorized: unsupported data-type ");
 521                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 522                                      scalar_type);
 523                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 524                 }
 525               return false;
 526             }
 527
 528           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 529                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 530             {
 531               if (dump_enabled_p ())
 532                 {
 533                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 534                                    "not vectorized: different sized vector "
 535                                    "types in statement, ");
 536                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 537                                      vectype);
 538                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 539                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 540                                      vf_vectype);
 541                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 542                 }
 543               return false;
 544             }
 545
 546           if (dump_enabled_p ())
 547             {
 548               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 549               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 550               dump_printf (MSG_NOTE, "\n");
 551             }
 552
 553           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 554           if (dump_enabled_p ())
 555             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 556           if (!vectorization_factor
 557               || (nunits > vectorization_factor))
 558             vectorization_factor = nunits;
 559
 560           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 561             {
 562               pattern_def_seq = NULL;
 563               gsi_next (&si);
 564             }
 565         }
 566     }
 567
 568   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 569   if (dump_enabled_p ())
 570     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 571                      vectorization_factor);
 572   if (vectorization_factor <= 1)
 573     {
 574       if (dump_enabled_p ())
 575         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 576                          "not vectorized: unsupported data-type\n");
 577       return false;
 578     }
 579   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 580
 581   for (i = 0; i < mask_producers.length (); i++)
 582     {
 583       tree mask_type = NULL;
 584
 585       stmt = STMT_VINFO_STMT (mask_producers[i]);
 586
 587       if (is_gimple_assign (stmt)
 588           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
 589           && !VECT_SCALAR_BOOLEAN_TYPE_P
 590                                       (TREE_TYPE (gimple_assign_rhs1 (stmt))))
 591         {
 592           scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
 593           mask_type = get_mask_type_for_scalar_type (scalar_type);
 594
 595           if (!mask_type)
 596             {
 597               if (dump_enabled_p ())
 598                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 599                                  "not vectorized: unsupported mask\n");
 600               return false;
 601             }
 602         }
 603       else
 604         {
 605           tree rhs;
 606           ssa_op_iter iter;
 607           gimple *def_stmt;
 608           enum vect_def_type dt;
 609
 610           FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
 611             {
 612               if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
 613                                        &def_stmt, &dt, &vectype))
 614                 {
 615                   if (dump_enabled_p ())
 616                     {
 617                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 618                                        "not vectorized: can't compute mask type "
 619                                        "for statement, ");
 620                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 621                                         0);
 622                     }
 623                   return false;
 624                 }
 625
 626               /* No vectype probably means external definition.
 627                  Allow it in case there is another operand which
 628                  allows to determine mask type.  */
 629               if (!vectype)
 630                 continue;
 631
 632               if (!mask_type)
 633                 mask_type = vectype;
 634               else if (TYPE_VECTOR_SUBPARTS (mask_type)
 635                        != TYPE_VECTOR_SUBPARTS (vectype))
 636                 {
 637                   if (dump_enabled_p ())
 638                     {
 639                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 640                                        "not vectorized: different sized masks "
 641                                        "types in statement, ");
 642                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 643                                          mask_type);
 644                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 645                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 646                                          vectype);
 647                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 648                     }
 649                   return false;
 650                 }
 651               else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
 652                        != VECTOR_BOOLEAN_TYPE_P (vectype))
 653                 {
 654                   if (dump_enabled_p ())
 655                     {
 656                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 657                                        "not vectorized: mixed mask and "
 658                                        "nonmask vector types in statement, ");
 659                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 660                                          mask_type);
 661                       dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 662                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 663                                          vectype);
 664                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 665                     }
 666                   return false;
 667                 }
 668             }
 669
 670           /* We may compare boolean value loaded as vector of integers.
 671              Fix mask_type in such case.  */
 672           if (mask_type
 673               && !VECTOR_BOOLEAN_TYPE_P (mask_type)
 674               && gimple_code (stmt) == GIMPLE_ASSIGN
 675               && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
 676             mask_type = build_same_sized_truth_vector_type (mask_type);
 677         }
 678
 679       /* No mask_type should mean loop invariant predicate.
 680          This is probably a subject for optimization in
 681          if-conversion.  */
 682       if (!mask_type)
 683         {
 684           if (dump_enabled_p ())
 685             {
 686               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 687                                "not vectorized: can't compute mask type "
 688                                "for statement, ");
 689               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 690                                 0);
 691             }
 692           return false;
 693         }
 694
 695       STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
 696     }
 697
 698   return true;
 699 }
 700
 701
 702 /* Function vect_is_simple_iv_evolution.
 703
 704    FORNOW: A simple evolution of an induction variables in the loop is
 705    considered a polynomial evolution.  */
 706
 707 static bool
 708 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 709                              tree * step)
 710 {
 711   tree init_expr;
 712   tree step_expr;
 713   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 714   basic_block bb;
 715
 716   /* When there is no evolution in this loop, the evolution function
 717      is not "simple".  */
 718   if (evolution_part == NULL_TREE)
 719     return false;
 720
 721   /* When the evolution is a polynomial of degree >= 2
 722      the evolution function is not "simple".  */
 723   if (tree_is_chrec (evolution_part))
 724     return false;
 725
 726   step_expr = evolution_part;
 727   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 728
 729   if (dump_enabled_p ())
 730     {
 731       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 732       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 733       dump_printf (MSG_NOTE, ",  init: ");
 734       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 735       dump_printf (MSG_NOTE, "\n");
 736     }
 737
 738   *init = init_expr;
 739   *step = step_expr;
 740
 741   if (TREE_CODE (step_expr) != INTEGER_CST
 742       && (TREE_CODE (step_expr) != SSA_NAME
 743           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 744               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 745           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 746               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 747                   || !flag_associative_math)))
 748       && (TREE_CODE (step_expr) != REAL_CST
 749           || !flag_associative_math))
 750     {
 751       if (dump_enabled_p ())
 752         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 753                          "step unknown.\n");
 754       return false;
 755     }
 756
 757   return true;
 758 }
 759
 760 /* Function vect_analyze_scalar_cycles_1.
 761
 762    Examine the cross iteration def-use cycles of scalar variables
 763    in LOOP.  LOOP_VINFO represents the loop that is now being
 764    considered for vectorization (can be LOOP, or an outer-loop
 765    enclosing LOOP).  */
 766
 767 static void
 768 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 769 {
 770   basic_block bb = loop->header;
 771   tree init, step;
 772   auto_vec<gimple *, 64> worklist;
 773   gphi_iterator gsi;
 774   bool double_reduc;
 775
 776   if (dump_enabled_p ())
 777     dump_printf_loc (MSG_NOTE, vect_location,
 778                      "=== vect_analyze_scalar_cycles ===\n");
 779
 780   /* First - identify all inductions.  Reduction detection assumes that all the
 781      inductions have been identified, therefore, this order must not be
 782      changed.  */
 783   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 784     {
 785       gphi *phi = gsi.phi ();
 786       tree access_fn = NULL;
 787       tree def = PHI_RESULT (phi);
 788       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 789
 790       if (dump_enabled_p ())
 791         {
 792           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 793           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 794         }
 795
 796       /* Skip virtual phi's.  The data dependences that are associated with
 797          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 798       if (virtual_operand_p (def))
 799         continue;
 800
 801       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 802
 803       /* Analyze the evolution function.  */
 804       access_fn = analyze_scalar_evolution (loop, def);
 805       if (access_fn)
 806         {
 807           STRIP_NOPS (access_fn);
 808           if (dump_enabled_p ())
 809             {
 810               dump_printf_loc (MSG_NOTE, vect_location,
 811                                "Access function of PHI: ");
 812               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 813               dump_printf (MSG_NOTE, "\n");
 814             }
 815           STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 816             = initial_condition_in_loop_num (access_fn, loop->num);
 817           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 818             = evolution_part_in_loop_num (access_fn, loop->num);
 819         }
 820
 821       if (!access_fn
 822           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 823           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 824               && TREE_CODE (step) != INTEGER_CST))
 825         {
 826           worklist.safe_push (phi);
 827           continue;
 828         }
 829
 830       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
 831                   != NULL_TREE);
 832       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 833
 834       if (dump_enabled_p ())
 835         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 836       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 837     }
 838
 839
 840   /* Second - identify all reductions and nested cycles.  */
 841   while (worklist.length () > 0)
 842     {
 843       gimple *phi = worklist.pop ();
 844       tree def = PHI_RESULT (phi);
 845       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 846       gimple *reduc_stmt;
 847
 848       if (dump_enabled_p ())
 849         {
 850           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 851           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 852         }
 853
 854       gcc_assert (!virtual_operand_p (def)
 855                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 856
 857       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi,
 858                                                 &double_reduc, false);
 859       if (reduc_stmt)
 860         {
 861           if (double_reduc)
 862             {
 863               if (dump_enabled_p ())
 864                 dump_printf_loc (MSG_NOTE, vect_location,
 865                                  "Detected double reduction.\n");
 866
 867               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 868               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 869                                                     vect_double_reduction_def;
 870             }
 871           else
 872             {
 873               if (loop != LOOP_VINFO_LOOP (loop_vinfo))
 874                 {
 875                   if (dump_enabled_p ())
 876                     dump_printf_loc (MSG_NOTE, vect_location,
 877                                      "Detected vectorizable nested cycle.\n");
 878
 879                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 880                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 881                                                              vect_nested_cycle;
 882                 }
 883               else
 884                 {
 885                   if (dump_enabled_p ())
 886                     dump_printf_loc (MSG_NOTE, vect_location,
 887                                      "Detected reduction.\n");
 888
 889                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 890                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 891                                                            vect_reduction_def;
 892                   /* Store the reduction cycles for possible vectorization in
 893                      loop-aware SLP.  */
 894                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 895                 }
 896             }
 897         }
 898       else
 899         if (dump_enabled_p ())
 900           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 901                            "Unknown def-use cycle pattern.\n");
 902     }
 903 }
 904
 905
 906 /* Function vect_analyze_scalar_cycles.
 907
 908    Examine the cross iteration def-use cycles of scalar variables, by
 909    analyzing the loop-header PHIs of scalar variables.  Classify each
 910    cycle as one of the following: invariant, induction, reduction, unknown.
 911    We do that for the loop represented by LOOP_VINFO, and also to its
 912    inner-loop, if exists.
 913    Examples for scalar cycles:
 914
 915    Example1: reduction:
 916
 917               loop1:
 918               for (i=0; i<N; i++)
 919                  sum += a[i];
 920
 921    Example2: induction:
 922
 923               loop2:
 924               for (i=0; i<N; i++)
 925                  a[i] = i;  */
 926
 927 static void
 928 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 929 {
 930   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 931
 932   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 933
 934   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 935      Reductions in such inner-loop therefore have different properties than
 936      the reductions in the nest that gets vectorized:
 937      1. When vectorized, they are executed in the same order as in the original
 938         scalar loop, so we can't change the order of computation when
 939         vectorizing them.
 940      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 941         current checks are too strict.  */
 942
 943   if (loop->inner)
 944     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 945 }
 946
 947 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 948
 949 static void
 950 vect_fixup_reduc_chain (gimple *stmt)
 951 {
 952   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 953   gimple *stmtp;
 954   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 955               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 956   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 957   do
 958     {
 959       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 960       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 961       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 962       if (stmt)
 963         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 964           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 965     }
 966   while (stmt);
 967   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 968 }
 969
 970 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 971
 972 static void
 973 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 974 {
 975   gimple *first;
 976   unsigned i;
 977
 978   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 979     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 980       {
 981         gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first));
 982         while (next)
 983           {
 984             if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)))
 985               break;
 986             next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
 987           }
 988         /* If not all stmt in the chain are patterns try to handle
 989            the chain without patterns.  */
 990         if (! next)
 991           {
 992             vect_fixup_reduc_chain (first);
 993             LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 994               = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 995           }
 996       }
 997 }
 998
 999 /* Function vect_get_loop_niters.
1000
1001    Determine how many iterations the loop is executed and place it
1002    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
1003    in NUMBER_OF_ITERATIONSM1.  Place the condition under which the
1004    niter information holds in ASSUMPTIONS.
1005
1006    Return the loop exit condition.  */
1007
1008
1009 static gcond *
1010 vect_get_loop_niters (struct loop *loop, tree *assumptions,
1011                       tree *number_of_iterations, tree *number_of_iterationsm1)
1012 {
1013   edge exit = single_exit (loop);
1014   struct tree_niter_desc niter_desc;
1015   tree niter_assumptions, niter, may_be_zero;
1016   gcond *cond = get_loop_exit_condition (loop);
1017
1018   *assumptions = boolean_true_node;
1019   *number_of_iterationsm1 = chrec_dont_know;
1020   *number_of_iterations = chrec_dont_know;
1021   if (dump_enabled_p ())
1022     dump_printf_loc (MSG_NOTE, vect_location,
1023                      "=== get_loop_niters ===\n");
1024
1025   if (!exit)
1026     return cond;
1027
1028   niter = chrec_dont_know;
1029   may_be_zero = NULL_TREE;
1030   niter_assumptions = boolean_true_node;
1031   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
1032       || chrec_contains_undetermined (niter_desc.niter))
1033     return cond;
1034
1035   niter_assumptions = niter_desc.assumptions;
1036   may_be_zero = niter_desc.may_be_zero;
1037   niter = niter_desc.niter;
1038
1039   if (may_be_zero && integer_zerop (may_be_zero))
1040     may_be_zero = NULL_TREE;
1041
1042   if (may_be_zero)
1043     {
1044       if (COMPARISON_CLASS_P (may_be_zero))
1045         {
1046           /* Try to combine may_be_zero with assumptions, this can simplify
1047              computation of niter expression.  */
1048           if (niter_assumptions && !integer_nonzerop (niter_assumptions))
1049             niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1050                                              niter_assumptions,
1051                                              fold_build1 (TRUTH_NOT_EXPR,
1052                                                           boolean_type_node,
1053                                                           may_be_zero));
1054           else
1055             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
1056                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1057
1058           may_be_zero = NULL_TREE;
1059         }
1060       else if (integer_nonzerop (may_be_zero))
1061         {
1062           *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
1063           *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
1064           return cond;
1065         }
1066       else
1067         return cond;
1068     }
1069
1070   *assumptions = niter_assumptions;
1071   *number_of_iterationsm1 = niter;
1072
1073   /* We want the number of loop header executions which is the number
1074      of latch executions plus one.
1075      ???  For UINT_MAX latch executions this number overflows to zero
1076      for loops like do { n++; } while (n != 0);  */
1077   if (niter && !chrec_contains_undetermined (niter))
1078     niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),
1079                           build_int_cst (TREE_TYPE (niter), 1));
1080   *number_of_iterations = niter;
1081
1082   return cond;
1083 }
1084
1085 /* Function bb_in_loop_p
1086
1087    Used as predicate for dfs order traversal of the loop bbs.  */
1088
1089 static bool
1090 bb_in_loop_p (const_basic_block bb, const void *data)
1091 {
1092   const struct loop *const loop = (const struct loop *)data;
1093   if (flow_bb_inside_loop_p (loop, bb))
1094     return true;
1095   return false;
1096 }
1097
1098
1099 /* Function new_loop_vec_info.
1100
1101    Create and initialize a new loop_vec_info struct for LOOP, as well as
1102    stmt_vec_info structs for all the stmts in LOOP.  */
1103
1104 static loop_vec_info
1105 new_loop_vec_info (struct loop *loop)
1106 {
1107   loop_vec_info res;
1108   basic_block *bbs;
1109   gimple_stmt_iterator si;
1110   unsigned int i, nbbs;
1111
1112   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1113   res->kind = vec_info::loop;
1114   LOOP_VINFO_LOOP (res) = loop;
1115
1116   bbs = get_loop_body (loop);
1117
1118   /* Create/Update stmt_info for all stmts in the loop.  */
1119   for (i = 0; i < loop->num_nodes; i++)
1120     {
1121       basic_block bb = bbs[i];
1122
1123       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1124         {
1125           gimple *phi = gsi_stmt (si);
1126           gimple_set_uid (phi, 0);
1127           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
1128         }
1129
1130       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1131         {
1132           gimple *stmt = gsi_stmt (si);
1133           gimple_set_uid (stmt, 0);
1134           set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
1135         }
1136     }
1137
1138   /* CHECKME: We want to visit all BBs before their successors (except for
1139      latch blocks, for which this assertion wouldn't hold).  In the simple
1140      case of the loop forms we allow, a dfs order of the BBs would the same
1141      as reversed postorder traversal, so we are safe.  */
1142
1143    free (bbs);
1144    bbs = XCNEWVEC (basic_block, loop->num_nodes);
1145    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1146                               bbs, loop->num_nodes, loop);
1147    gcc_assert (nbbs == loop->num_nodes);
1148
1149   LOOP_VINFO_BBS (res) = bbs;
1150   LOOP_VINFO_NITERSM1 (res) = NULL;
1151   LOOP_VINFO_NITERS (res) = NULL;
1152   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
1153   LOOP_VINFO_NITERS_ASSUMPTIONS (res) = NULL;
1154   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
1155   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1156   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
1157   LOOP_VINFO_VECT_FACTOR (res) = 0;
1158   LOOP_VINFO_LOOP_NEST (res) = vNULL;
1159   LOOP_VINFO_DATAREFS (res) = vNULL;
1160   LOOP_VINFO_DDRS (res) = vNULL;
1161   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1162   LOOP_VINFO_MAY_MISALIGN_STMTS (res) = vNULL;
1163   LOOP_VINFO_MAY_ALIAS_DDRS (res) = vNULL;
1164   LOOP_VINFO_GROUPED_STORES (res) = vNULL;
1165   LOOP_VINFO_REDUCTIONS (res) = vNULL;
1166   LOOP_VINFO_REDUCTION_CHAINS (res) = vNULL;
1167   LOOP_VINFO_SLP_INSTANCES (res) = vNULL;
1168   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1169   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1170   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1171   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1172   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1173   LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
1174
1175   return res;
1176 }
1177
1178
1179 /* Function destroy_loop_vec_info.
1180
1181    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1182    stmts in the loop.  */
1183
1184 void
1185 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1186 {
1187   struct loop *loop;
1188   basic_block *bbs;
1189   int nbbs;
1190   gimple_stmt_iterator si;
1191   int j;
1192   vec<slp_instance> slp_instances;
1193   slp_instance instance;
1194   bool swapped;
1195
1196   if (!loop_vinfo)
1197     return;
1198
1199   loop = LOOP_VINFO_LOOP (loop_vinfo);
1200
1201   bbs = LOOP_VINFO_BBS (loop_vinfo);
1202   nbbs = clean_stmts ? loop->num_nodes : 0;
1203   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1204
1205   for (j = 0; j < nbbs; j++)
1206     {
1207       basic_block bb = bbs[j];
1208       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1209         free_stmt_vec_info (gsi_stmt (si));
1210
1211       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1212         {
1213           gimple *stmt = gsi_stmt (si);
1214
1215           /* We may have broken canonical form by moving a constant
1216              into RHS1 of a commutative op.  Fix such occurrences.  */
1217           if (swapped && is_gimple_assign (stmt))
1218             {
1219               enum tree_code code = gimple_assign_rhs_code (stmt);
1220
1221               if ((code == PLUS_EXPR
1222                    || code == POINTER_PLUS_EXPR
1223                    || code == MULT_EXPR)
1224                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1225                 swap_ssa_operands (stmt,
1226                                    gimple_assign_rhs1_ptr (stmt),
1227                                    gimple_assign_rhs2_ptr (stmt));
1228               else if (code == COND_EXPR
1229                        && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
1230                 {
1231                   tree cond_expr = gimple_assign_rhs1 (stmt);
1232                   enum tree_code cond_code = TREE_CODE (cond_expr);
1233
1234                   if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1235                     {
1236                       bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr,
1237                                                                   0));
1238                       cond_code = invert_tree_comparison (cond_code,
1239                                                           honor_nans);
1240                       if (cond_code != ERROR_MARK)
1241                         {
1242                           TREE_SET_CODE (cond_expr, cond_code);
1243                           swap_ssa_operands (stmt,
1244                                              gimple_assign_rhs2_ptr (stmt),
1245                                              gimple_assign_rhs3_ptr (stmt));
1246                         }
1247                     }
1248                 }
1249             }
1250
1251           /* Free stmt_vec_info.  */
1252           free_stmt_vec_info (stmt);
1253           gsi_next (&si);
1254         }
1255     }
1256
1257   free (LOOP_VINFO_BBS (loop_vinfo));
1258   vect_destroy_datarefs (loop_vinfo);
1259   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1260   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1261   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1262   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
1263   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1264   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1265   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1266     vect_free_slp_instance (instance);
1267
1268   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1269   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1270   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1271   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1272
1273   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1274   loop_vinfo->scalar_cost_vec.release ();
1275
1276   free (loop_vinfo);
1277   loop->aux = NULL;
1278 }
1279
1280
1281 /* Calculate the cost of one scalar iteration of the loop.  */
1282 static void
1283 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1284 {
1285   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1286   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1287   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1288   int innerloop_iters, i;
1289
1290   /* Count statements in scalar loop.  Using this as scalar cost for a single
1291      iteration for now.
1292
1293      TODO: Add outer loop support.
1294
1295      TODO: Consider assigning different costs to different scalar
1296      statements.  */
1297
1298   /* FORNOW.  */
1299   innerloop_iters = 1;
1300   if (loop->inner)
1301     innerloop_iters = 50; /* FIXME */
1302
1303   for (i = 0; i < nbbs; i++)
1304     {
1305       gimple_stmt_iterator si;
1306       basic_block bb = bbs[i];
1307
1308       if (bb->loop_father == loop->inner)
1309         factor = innerloop_iters;
1310       else
1311         factor = 1;
1312
1313       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1314         {
1315           gimple *stmt = gsi_stmt (si);
1316           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1317
1318           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1319             continue;
1320
1321           /* Skip stmts that are not vectorized inside the loop.  */
1322           if (stmt_info
1323               && !STMT_VINFO_RELEVANT_P (stmt_info)
1324               && (!STMT_VINFO_LIVE_P (stmt_info)
1325                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1326               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1327             continue;
1328
1329           vect_cost_for_stmt kind;
1330           if (STMT_VINFO_DATA_REF (stmt_info))
1331             {
1332               if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1333                kind = scalar_load;
1334              else
1335                kind = scalar_store;
1336             }
1337           else
1338             kind = scalar_stmt;
1339
1340           scalar_single_iter_cost
1341             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1342                                  factor, kind, stmt_info, 0, vect_prologue);
1343         }
1344     }
1345   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1346     = scalar_single_iter_cost;
1347 }
1348
1349
1350 /* Function vect_analyze_loop_form_1.
1351
1352    Verify that certain CFG restrictions hold, including:
1353    - the loop has a pre-header
1354    - the loop has a single entry and exit
1355    - the loop exit condition is simple enough
1356    - the number of iterations can be analyzed, i.e, a countable loop.  The
1357      niter could be analyzed under some assumptions.  */
1358
1359 bool
1360 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1361                           tree *assumptions, tree *number_of_iterationsm1,
1362                           tree *number_of_iterations, gcond **inner_loop_cond)
1363 {
1364   if (dump_enabled_p ())
1365     dump_printf_loc (MSG_NOTE, vect_location,
1366                      "=== vect_analyze_loop_form ===\n");
1367
1368   /* Different restrictions apply when we are considering an inner-most loop,
1369      vs. an outer (nested) loop.
1370      (FORNOW. May want to relax some of these restrictions in the future).  */
1371
1372   if (!loop->inner)
1373     {
1374       /* Inner-most loop.  We currently require that the number of BBs is
1375          exactly 2 (the header and latch).  Vectorizable inner-most loops
1376          look like this:
1377
1378                         (pre-header)
1379                            |
1380                           header <--------+
1381                            | |            |
1382                            | +--> latch --+
1383                            |
1384                         (exit-bb)  */
1385
1386       if (loop->num_nodes != 2)
1387         {
1388           if (dump_enabled_p ())
1389             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1390                              "not vectorized: control flow in loop.\n");
1391           return false;
1392         }
1393
1394       if (empty_block_p (loop->header))
1395         {
1396           if (dump_enabled_p ())
1397             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1398                              "not vectorized: empty loop.\n");
1399           return false;
1400         }
1401     }
1402   else
1403     {
1404       struct loop *innerloop = loop->inner;
1405       edge entryedge;
1406
1407       /* Nested loop. We currently require that the loop is doubly-nested,
1408          contains a single inner loop, and the number of BBs is exactly 5.
1409          Vectorizable outer-loops look like this:
1410
1411                         (pre-header)
1412                            |
1413                           header <---+
1414                            |         |
1415                           inner-loop |
1416                            |         |
1417                           tail ------+
1418                            |
1419                         (exit-bb)
1420
1421          The inner-loop has the properties expected of inner-most loops
1422          as described above.  */
1423
1424       if ((loop->inner)->inner || (loop->inner)->next)
1425         {
1426           if (dump_enabled_p ())
1427             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1428                              "not vectorized: multiple nested loops.\n");
1429           return false;
1430         }
1431
1432       if (loop->num_nodes != 5)
1433         {
1434           if (dump_enabled_p ())
1435             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1436                              "not vectorized: control flow in loop.\n");
1437           return false;
1438         }
1439
1440       entryedge = loop_preheader_edge (innerloop);
1441       if (entryedge->src != loop->header
1442           || !single_exit (innerloop)
1443           || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1444         {
1445           if (dump_enabled_p ())
1446             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1447                              "not vectorized: unsupported outerloop form.\n");
1448           return false;
1449         }
1450
1451       /* Analyze the inner-loop.  */
1452       tree inner_niterm1, inner_niter, inner_assumptions;
1453       if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1454                                       &inner_assumptions, &inner_niterm1,
1455                                       &inner_niter, NULL)
1456           /* Don't support analyzing niter under assumptions for inner
1457              loop.  */
1458           || !integer_onep (inner_assumptions))
1459         {
1460           if (dump_enabled_p ())
1461             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1462                              "not vectorized: Bad inner loop.\n");
1463           return false;
1464         }
1465
1466       if (!expr_invariant_in_loop_p (loop, inner_niter))
1467         {
1468           if (dump_enabled_p ())
1469             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1470                              "not vectorized: inner-loop count not"
1471                              " invariant.\n");
1472           return false;
1473         }
1474
1475       if (dump_enabled_p ())
1476         dump_printf_loc (MSG_NOTE, vect_location,
1477                          "Considering outer-loop vectorization.\n");
1478     }
1479
1480   if (!single_exit (loop)
1481       || EDGE_COUNT (loop->header->preds) != 2)
1482     {
1483       if (dump_enabled_p ())
1484         {
1485           if (!single_exit (loop))
1486             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1487                              "not vectorized: multiple exits.\n");
1488           else if (EDGE_COUNT (loop->header->preds) != 2)
1489             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1490                              "not vectorized: too many incoming edges.\n");
1491         }
1492       return false;
1493     }
1494
1495   /* We assume that the loop exit condition is at the end of the loop. i.e,
1496      that the loop is represented as a do-while (with a proper if-guard
1497      before the loop if needed), where the loop header contains all the
1498      executable statements, and the latch is empty.  */
1499   if (!empty_block_p (loop->latch)
1500       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1501     {
1502       if (dump_enabled_p ())
1503         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1504                          "not vectorized: latch block not empty.\n");
1505       return false;
1506     }
1507
1508   /* Make sure the exit is not abnormal.  */
1509   edge e = single_exit (loop);
1510   if (e->flags & EDGE_ABNORMAL)
1511     {
1512       if (dump_enabled_p ())
1513         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1514                          "not vectorized: abnormal loop exit edge.\n");
1515       return false;
1516     }
1517
1518   *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
1519                                      number_of_iterationsm1);
1520   if (!*loop_cond)
1521     {
1522       if (dump_enabled_p ())
1523         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1524                          "not vectorized: complicated exit condition.\n");
1525       return false;
1526     }
1527
1528   if (integer_zerop (*assumptions)
1529       || !*number_of_iterations
1530       || chrec_contains_undetermined (*number_of_iterations))
1531     {
1532       if (dump_enabled_p ())
1533         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1534                          "not vectorized: number of iterations cannot be "
1535                          "computed.\n");
1536       return false;
1537     }
1538
1539   if (integer_zerop (*number_of_iterations))
1540     {
1541       if (dump_enabled_p ())
1542         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1543                          "not vectorized: number of iterations = 0.\n");
1544       return false;
1545     }
1546
1547   return true;
1548 }
1549
1550 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form.  */
1551
1552 loop_vec_info
1553 vect_analyze_loop_form (struct loop *loop)
1554 {
1555   tree assumptions, number_of_iterations, number_of_iterationsm1;
1556   gcond *loop_cond, *inner_loop_cond = NULL;
1557
1558   if (! vect_analyze_loop_form_1 (loop, &loop_cond,
1559                                   &assumptions, &number_of_iterationsm1,
1560                                   &number_of_iterations, &inner_loop_cond))
1561     return NULL;
1562
1563   loop_vec_info loop_vinfo = new_loop_vec_info (loop);
1564   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1565   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1566   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1567   if (!integer_onep (assumptions))
1568     {
1569       /* We consider to vectorize this loop by versioning it under
1570          some assumptions.  In order to do this, we need to clear
1571          existing information computed by scev and niter analyzer.  */
1572       scev_reset_htab ();
1573       free_numbers_of_iterations_estimates_loop (loop);
1574       /* Also set flag for this loop so that following scev and niter
1575          analysis are done under the assumptions.  */
1576       loop_constraint_set (loop, LOOP_C_FINITE);
1577       /* Also record the assumptions for versioning.  */
1578       LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = assumptions;
1579     }
1580
1581   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1582     {
1583       if (dump_enabled_p ())
1584         {
1585           dump_printf_loc (MSG_NOTE, vect_location,
1586                            "Symbolic number of iterations is ");
1587           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1588           dump_printf (MSG_NOTE, "\n");
1589         }
1590     }
1591
1592   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1593   if (inner_loop_cond)
1594     STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond))
1595       = loop_exit_ctrl_vec_info_type;
1596
1597   gcc_assert (!loop->aux);
1598   loop->aux = loop_vinfo;
1599   return loop_vinfo;
1600 }
1601
1602
1603
1604 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1605    statements update the vectorization factor.  */
1606
1607 static void
1608 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1609 {
1610   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1611   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1612   int nbbs = loop->num_nodes;
1613   unsigned int vectorization_factor;
1614   int i;
1615
1616   if (dump_enabled_p ())
1617     dump_printf_loc (MSG_NOTE, vect_location,
1618                      "=== vect_update_vf_for_slp ===\n");
1619
1620   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1621   gcc_assert (vectorization_factor != 0);
1622
1623   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1624      vectorization factor of the loop is the unrolling factor required by
1625      the SLP instances.  If that unrolling factor is 1, we say, that we
1626      perform pure SLP on loop - cross iteration parallelism is not
1627      exploited.  */
1628   bool only_slp_in_loop = true;
1629   for (i = 0; i < nbbs; i++)
1630     {
1631       basic_block bb = bbs[i];
1632       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1633            gsi_next (&si))
1634         {
1635           gimple *stmt = gsi_stmt (si);
1636           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1638               && STMT_VINFO_RELATED_STMT (stmt_info))
1639             {
1640               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1641               stmt_info = vinfo_for_stmt (stmt);
1642             }
1643           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1644                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1645               && !PURE_SLP_STMT (stmt_info))
1646             /* STMT needs both SLP and loop-based vectorization.  */
1647             only_slp_in_loop = false;
1648         }
1649     }
1650
1651   if (only_slp_in_loop)
1652     {
1653       dump_printf_loc (MSG_NOTE, vect_location,
1654                        "Loop contains only SLP stmts\n");
1655       vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1656     }
1657   else
1658     {
1659       dump_printf_loc (MSG_NOTE, vect_location,
1660                        "Loop contains SLP and non-SLP stmts\n");
1661       vectorization_factor
1662         = least_common_multiple (vectorization_factor,
1663                                  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1664     }
1665
1666   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1667   if (dump_enabled_p ())
1668     dump_printf_loc (MSG_NOTE, vect_location,
1669                      "Updating vectorization factor to %d\n",
1670                      vectorization_factor);
1671 }
1672
1673 /* Function vect_analyze_loop_operations.
1674
1675    Scan the loop stmts and make sure they are all vectorizable.  */
1676
1677 static bool
1678 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1679 {
1680   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1681   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1682   int nbbs = loop->num_nodes;
1683   int i;
1684   stmt_vec_info stmt_info;
1685   bool need_to_vectorize = false;
1686   bool ok;
1687
1688   if (dump_enabled_p ())
1689     dump_printf_loc (MSG_NOTE, vect_location,
1690                      "=== vect_analyze_loop_operations ===\n");
1691
1692   for (i = 0; i < nbbs; i++)
1693     {
1694       basic_block bb = bbs[i];
1695
1696       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1697            gsi_next (&si))
1698         {
1699           gphi *phi = si.phi ();
1700           ok = true;
1701
1702           stmt_info = vinfo_for_stmt (phi);
1703           if (dump_enabled_p ())
1704             {
1705               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1706               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1707             }
1708           if (virtual_operand_p (gimple_phi_result (phi)))
1709             continue;
1710
1711           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1712              (i.e., a phi in the tail of the outer-loop).  */
1713           if (! is_loop_header_bb_p (bb))
1714             {
1715               /* FORNOW: we currently don't support the case that these phis
1716                  are not used in the outerloop (unless it is double reduction,
1717                  i.e., this phi is vect_reduction_def), cause this case
1718                  requires to actually do something here.  */
1719               if (STMT_VINFO_LIVE_P (stmt_info)
1720                   && STMT_VINFO_DEF_TYPE (stmt_info)
1721                      != vect_double_reduction_def)
1722                 {
1723                   if (dump_enabled_p ())
1724                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1725                                      "Unsupported loop-closed phi in "
1726                                      "outer-loop.\n");
1727                   return false;
1728                 }
1729
1730               /* If PHI is used in the outer loop, we check that its operand
1731                  is defined in the inner loop.  */
1732               if (STMT_VINFO_RELEVANT_P (stmt_info))
1733                 {
1734                   tree phi_op;
1735                   gimple *op_def_stmt;
1736
1737                   if (gimple_phi_num_args (phi) != 1)
1738                     return false;
1739
1740                   phi_op = PHI_ARG_DEF (phi, 0);
1741                   if (TREE_CODE (phi_op) != SSA_NAME)
1742                     return false;
1743
1744                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1745                   if (gimple_nop_p (op_def_stmt)
1746                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1747                       || !vinfo_for_stmt (op_def_stmt))
1748                     return false;
1749
1750                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1751                         != vect_used_in_outer
1752                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1753                            != vect_used_in_outer_by_reduction)
1754                     return false;
1755                 }
1756
1757               continue;
1758             }
1759
1760           gcc_assert (stmt_info);
1761
1762           if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1763                || STMT_VINFO_LIVE_P (stmt_info))
1764               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1765             {
1766               /* A scalar-dependence cycle that we don't support.  */
1767               if (dump_enabled_p ())
1768                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769                                  "not vectorized: scalar dependence cycle.\n");
1770               return false;
1771             }
1772
1773           if (STMT_VINFO_RELEVANT_P (stmt_info))
1774             {
1775               need_to_vectorize = true;
1776               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
1777                   && ! PURE_SLP_STMT (stmt_info))
1778                 ok = vectorizable_induction (phi, NULL, NULL, NULL);
1779             }
1780
1781           if (ok && STMT_VINFO_LIVE_P (stmt_info))
1782             ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL);
1783
1784           if (!ok)
1785             {
1786               if (dump_enabled_p ())
1787                 {
1788                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789                                    "not vectorized: relevant phi not "
1790                                    "supported: ");
1791                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1792                 }
1793               return false;
1794             }
1795         }
1796
1797       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1798            gsi_next (&si))
1799         {
1800           gimple *stmt = gsi_stmt (si);
1801           if (!gimple_clobber_p (stmt)
1802               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1803             return false;
1804         }
1805     } /* bbs */
1806
1807   /* All operations in the loop are either irrelevant (deal with loop
1808      control, or dead), or only used outside the loop and can be moved
1809      out of the loop (e.g. invariants, inductions).  The loop can be
1810      optimized away by scalar optimizations.  We're better off not
1811      touching this loop.  */
1812   if (!need_to_vectorize)
1813     {
1814       if (dump_enabled_p ())
1815         dump_printf_loc (MSG_NOTE, vect_location,
1816                          "All the computation can be taken out of the loop.\n");
1817       if (dump_enabled_p ())
1818         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1819                          "not vectorized: redundant loop. no profit to "
1820                          "vectorize.\n");
1821       return false;
1822     }
1823
1824   return true;
1825 }
1826
1827
1828 /* Function vect_analyze_loop_2.
1829
1830    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1831    for it.  The different analyses will record information in the
1832    loop_vec_info struct.  */
1833 static bool
1834 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1835 {
1836   bool ok;
1837   int max_vf = MAX_VECTORIZATION_FACTOR;
1838   int min_vf = 2;
1839   unsigned int n_stmts = 0;
1840
1841   /* The first group of checks is independent of the vector size.  */
1842   fatal = true;
1843
1844   /* Find all data references in the loop (which correspond to vdefs/vuses)
1845      and analyze their evolution in the loop.  */
1846
1847   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1848
1849   loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1850   if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1851     {
1852       if (dump_enabled_p ())
1853         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1854                          "not vectorized: loop nest containing two "
1855                          "or more consecutive inner loops cannot be "
1856                          "vectorized\n");
1857       return false;
1858     }
1859
1860   for (unsigned i = 0; i < loop->num_nodes; i++)
1861     for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1862          !gsi_end_p (gsi); gsi_next (&gsi))
1863       {
1864         gimple *stmt = gsi_stmt (gsi);
1865         if (is_gimple_debug (stmt))
1866           continue;
1867         ++n_stmts;
1868         if (!find_data_references_in_stmt (loop, stmt,
1869                                            &LOOP_VINFO_DATAREFS (loop_vinfo)))
1870           {
1871             if (is_gimple_call (stmt) && loop->safelen)
1872               {
1873                 tree fndecl = gimple_call_fndecl (stmt), op;
1874                 if (fndecl != NULL_TREE)
1875                   {
1876                     cgraph_node *node = cgraph_node::get (fndecl);
1877                     if (node != NULL && node->simd_clones != NULL)
1878                       {
1879                         unsigned int j, n = gimple_call_num_args (stmt);
1880                         for (j = 0; j < n; j++)
1881                           {
1882                             op = gimple_call_arg (stmt, j);
1883                             if (DECL_P (op)
1884                                 || (REFERENCE_CLASS_P (op)
1885                                     && get_base_address (op)))
1886                               break;
1887                           }
1888                         op = gimple_call_lhs (stmt);
1889                         /* Ignore #pragma omp declare simd functions
1890                            if they don't have data references in the
1891                            call stmt itself.  */
1892                         if (j == n
1893                             && !(op
1894                                  && (DECL_P (op)
1895                                      || (REFERENCE_CLASS_P (op)
1896                                          && get_base_address (op)))))
1897                           continue;
1898                       }
1899                   }
1900               }
1901             if (dump_enabled_p ())
1902               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1903                                "not vectorized: loop contains function "
1904                                "calls or data references that cannot "
1905                                "be analyzed\n");
1906             return false;
1907           }
1908       }
1909
1910   /* Analyze the data references and also adjust the minimal
1911      vectorization factor according to the loads and stores.  */
1912
1913   ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1914   if (!ok)
1915     {
1916       if (dump_enabled_p ())
1917         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918                          "bad data references.\n");
1919       return false;
1920     }
1921
1922   /* Classify all cross-iteration scalar data-flow cycles.
1923      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1924   vect_analyze_scalar_cycles (loop_vinfo);
1925
1926   vect_pattern_recog (loop_vinfo);
1927
1928   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1929
1930   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1931      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1932
1933   ok = vect_analyze_data_ref_accesses (loop_vinfo);
1934   if (!ok)
1935     {
1936       if (dump_enabled_p ())
1937         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1938                          "bad data access.\n");
1939       return false;
1940     }
1941
1942   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1943
1944   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1945   if (!ok)
1946     {
1947       if (dump_enabled_p ())
1948         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1949                          "unexpected pattern.\n");
1950       return false;
1951     }
1952
1953   /* While the rest of the analysis below depends on it in some way.  */
1954   fatal = false;
1955
1956   /* Analyze data dependences between the data-refs in the loop
1957      and adjust the maximum vectorization factor according to
1958      the dependences.
1959      FORNOW: fail at the first data dependence that we encounter.  */
1960
1961   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1962   if (!ok
1963       || max_vf < min_vf)
1964     {
1965       if (dump_enabled_p ())
1966             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1967                              "bad data dependence.\n");
1968       return false;
1969     }
1970
1971   ok = vect_determine_vectorization_factor (loop_vinfo);
1972   if (!ok)
1973     {
1974       if (dump_enabled_p ())
1975         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1976                          "can't determine vectorization factor.\n");
1977       return false;
1978     }
1979   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1980     {
1981       if (dump_enabled_p ())
1982         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1983                          "bad data dependence.\n");
1984       return false;
1985     }
1986
1987   /* Compute the scalar iteration cost.  */
1988   vect_compute_single_scalar_iteration_cost (loop_vinfo);
1989
1990   int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1991   HOST_WIDE_INT estimated_niter;
1992   unsigned th;
1993   int min_scalar_loop_bound;
1994
1995   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1996   ok = vect_analyze_slp (loop_vinfo, n_stmts);
1997   if (!ok)
1998     return false;
1999
2000   /* If there are any SLP instances mark them as pure_slp.  */
2001   bool slp = vect_make_slp_decision (loop_vinfo);
2002   if (slp)
2003     {
2004       /* Find stmts that need to be both vectorized and SLPed.  */
2005       vect_detect_hybrid_slp (loop_vinfo);
2006
2007       /* Update the vectorization factor based on the SLP decision.  */
2008       vect_update_vf_for_slp (loop_vinfo);
2009     }
2010
2011   /* This is the point where we can re-start analysis with SLP forced off.  */
2012 start_over:
2013
2014   /* Now the vectorization factor is final.  */
2015   unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2016   gcc_assert (vectorization_factor != 0);
2017
2018   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
2019     dump_printf_loc (MSG_NOTE, vect_location,
2020                      "vectorization_factor = %d, niters = "
2021                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
2022                      LOOP_VINFO_INT_NITERS (loop_vinfo));
2023
2024   HOST_WIDE_INT max_niter
2025     = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2026   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2027        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
2028       || (max_niter != -1
2029           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
2030     {
2031       if (dump_enabled_p ())
2032         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2033                          "not vectorized: iteration count smaller than "
2034                          "vectorization factor.\n");
2035       return false;
2036     }
2037
2038   /* Analyze the alignment of the data-refs in the loop.
2039      Fail if a data reference is found that cannot be vectorized.  */
2040
2041   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2042   if (!ok)
2043     {
2044       if (dump_enabled_p ())
2045         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2046                          "bad data alignment.\n");
2047       return false;
2048     }
2049
2050   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2051      It is important to call pruning after vect_analyze_data_ref_accesses,
2052      since we use grouping information gathered by interleaving analysis.  */
2053   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2054   if (!ok)
2055     return false;
2056
2057   /* Do not invoke vect_enhance_data_refs_alignment for eplilogue
2058      vectorization.  */
2059   if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2060     {
2061     /* This pass will decide on using loop versioning and/or loop peeling in
2062        order to enhance the alignment of data references in the loop.  */
2063     ok = vect_enhance_data_refs_alignment (loop_vinfo);
2064     if (!ok)
2065       {
2066         if (dump_enabled_p ())
2067           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2068                            "bad data alignment.\n");
2069         return false;
2070       }
2071     }
2072
2073   if (slp)
2074     {
2075       /* Analyze operations in the SLP instances.  Note this may
2076          remove unsupported SLP instances which makes the above
2077          SLP kind detection invalid.  */
2078       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2079       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2080                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2081       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2082         goto again;
2083     }
2084
2085   /* Scan all the remaining operations in the loop that are not subject
2086      to SLP and make sure they are vectorizable.  */
2087   ok = vect_analyze_loop_operations (loop_vinfo);
2088   if (!ok)
2089     {
2090       if (dump_enabled_p ())
2091         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2092                          "bad operation or unsupported loop bound.\n");
2093       return false;
2094     }
2095
2096   /* If epilog loop is required because of data accesses with gaps,
2097      one additional iteration needs to be peeled.  Check if there is
2098      enough iterations for vectorization.  */
2099   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2100       && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2101     {
2102       int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2103       tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2104
2105       if (wi::to_widest (scalar_niters) < vf)
2106         {
2107           if (dump_enabled_p ())
2108             dump_printf_loc (MSG_NOTE, vect_location,
2109                              "loop has no enough iterations to support"
2110                              " peeling for gaps.\n");
2111           return false;
2112         }
2113     }
2114
2115   /* Analyze cost.  Decide if worth while to vectorize.  */
2116   int min_profitable_estimate, min_profitable_iters;
2117   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2118                                       &min_profitable_estimate);
2119
2120   if (min_profitable_iters < 0)
2121     {
2122       if (dump_enabled_p ())
2123         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2124                          "not vectorized: vectorization not profitable.\n");
2125       if (dump_enabled_p ())
2126         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2127                          "not vectorized: vector version will never be "
2128                          "profitable.\n");
2129       goto again;
2130     }
2131
2132   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
2133                             * vectorization_factor) - 1);
2134
2135   /* Use the cost model only if it is more conservative than user specified
2136      threshold.  */
2137   th = (unsigned) min_scalar_loop_bound;
2138   if (min_profitable_iters
2139       && (!min_scalar_loop_bound
2140           || min_profitable_iters > min_scalar_loop_bound))
2141     th = (unsigned) min_profitable_iters;
2142
2143   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2144
2145   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2146       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
2147     {
2148       if (dump_enabled_p ())
2149         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150                          "not vectorized: vectorization not profitable.\n");
2151       if (dump_enabled_p ())
2152         dump_printf_loc (MSG_NOTE, vect_location,
2153                          "not vectorized: iteration count smaller than user "
2154                          "specified loop bound parameter or minimum profitable "
2155                          "iterations (whichever is more conservative).\n");
2156       goto again;
2157     }
2158
2159   estimated_niter
2160     = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2161   if (estimated_niter == -1)
2162     estimated_niter = max_niter;
2163   if (estimated_niter != -1
2164       && ((unsigned HOST_WIDE_INT) estimated_niter
2165           <= MAX (th, (unsigned)min_profitable_estimate)))
2166     {
2167       if (dump_enabled_p ())
2168         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2169                          "not vectorized: estimated iteration count too "
2170                          "small.\n");
2171       if (dump_enabled_p ())
2172         dump_printf_loc (MSG_NOTE, vect_location,
2173                          "not vectorized: estimated iteration count smaller "
2174                          "than specified loop bound parameter or minimum "
2175                          "profitable iterations (whichever is more "
2176                          "conservative).\n");
2177       goto again;
2178     }
2179
2180   /* Decide whether we need to create an epilogue loop to handle
2181      remaining scalar iterations.  */
2182   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
2183         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2184        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2185
2186   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2187       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2188     {
2189       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
2190                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
2191           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2192         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2193     }
2194   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2195            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2196                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2197                /* In case of versioning, check if the maximum number of
2198                   iterations is greater than th.  If they are identical,
2199                   the epilogue is unnecessary.  */
2200                && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2201                    || (unsigned HOST_WIDE_INT) max_niter > th)))
2202     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2203
2204   /* If an epilogue loop is required make sure we can create one.  */
2205   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2206       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2207     {
2208       if (dump_enabled_p ())
2209         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
2210       if (!vect_can_advance_ivs_p (loop_vinfo)
2211           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2212                                            single_exit (LOOP_VINFO_LOOP
2213                                                          (loop_vinfo))))
2214         {
2215           if (dump_enabled_p ())
2216             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2217                              "not vectorized: can't create required "
2218                              "epilog loop\n");
2219           goto again;
2220         }
2221     }
2222
2223   /* During peeling, we need to check if number of loop iterations is
2224      enough for both peeled prolog loop and vector loop.  This check
2225      can be merged along with threshold check of loop versioning, so
2226      increase threshold for this case if necessary.  */
2227   if (LOOP_REQUIRES_VERSIONING (loop_vinfo)
2228       && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2229           || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
2230     {
2231       unsigned niters_th;
2232
2233       /* Niters for peeled prolog loop.  */
2234       if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2235         {
2236           struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2237           tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2238
2239           niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1;
2240         }
2241       else
2242         niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2243
2244       /* Niters for at least one iteration of vectorized loop.  */
2245       niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2246       /* One additional iteration because of peeling for gap.  */
2247       if (!LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2248         niters_th++;
2249       if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th)
2250         LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th;
2251     }
2252
2253   gcc_assert (vectorization_factor
2254               == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2255
2256   /* Ok to vectorize!  */
2257   return true;
2258
2259 again:
2260   /* Try again with SLP forced off but if we didn't do any SLP there is
2261      no point in re-trying.  */
2262   if (!slp)
2263     return false;
2264
2265   /* If there are reduction chains re-trying will fail anyway.  */
2266   if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
2267     return false;
2268
2269   /* Likewise if the grouped loads or stores in the SLP cannot be handled
2270      via interleaving or lane instructions.  */
2271   slp_instance instance;
2272   slp_tree node;
2273   unsigned i, j;
2274   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2275     {
2276       stmt_vec_info vinfo;
2277       vinfo = vinfo_for_stmt
2278           (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2279       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2280         continue;
2281       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2282       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
2283       tree vectype = STMT_VINFO_VECTYPE (vinfo);
2284       if (! vect_store_lanes_supported (vectype, size)
2285           && ! vect_grouped_store_supported (vectype, size))
2286         return false;
2287       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2288         {
2289           vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2290           vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
2291           bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo);
2292           size = STMT_VINFO_GROUP_SIZE (vinfo);
2293           vectype = STMT_VINFO_VECTYPE (vinfo);
2294           if (! vect_load_lanes_supported (vectype, size)
2295               && ! vect_grouped_load_supported (vectype, single_element_p,
2296                                                 size))
2297             return false;
2298         }
2299     }
2300
2301   if (dump_enabled_p ())
2302     dump_printf_loc (MSG_NOTE, vect_location,
2303                      "re-trying with SLP disabled\n");
2304
2305   /* Roll back state appropriately.  No SLP this time.  */
2306   slp = false;
2307   /* Restore vectorization factor as it were without SLP.  */
2308   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2309   /* Free the SLP instances.  */
2310   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2311     vect_free_slp_instance (instance);
2312   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2313   /* Reset SLP type to loop_vect on all stmts.  */
2314   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2315     {
2316       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2317       for (gimple_stmt_iterator si = gsi_start_phis (bb);
2318            !gsi_end_p (si); gsi_next (&si))
2319         {
2320           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2321           STMT_SLP_TYPE (stmt_info) = loop_vect;
2322         }
2323       for (gimple_stmt_iterator si = gsi_start_bb (bb);
2324            !gsi_end_p (si); gsi_next (&si))
2325         {
2326           stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si));
2327           STMT_SLP_TYPE (stmt_info) = loop_vect;
2328           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2329             {
2330               stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2331               STMT_SLP_TYPE (stmt_info) = loop_vect;
2332               for (gimple_stmt_iterator pi
2333                      = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2334                    !gsi_end_p (pi); gsi_next (&pi))
2335                 {
2336                   gimple *pstmt = gsi_stmt (pi);
2337                   STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2338                 }
2339             }
2340         }
2341     }
2342   /* Free optimized alias test DDRS.  */
2343   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2344   /* Reset target cost data.  */
2345   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2346   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2347     = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2348   /* Reset assorted flags.  */
2349   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2350   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2351   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2352
2353   goto start_over;
2354 }
2355
2356 /* Function vect_analyze_loop.
2357
2358    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2359    for it.  The different analyses will record information in the
2360    loop_vec_info struct.  If ORIG_LOOP_VINFO is not NULL epilogue must
2361    be vectorized.  */
2362 loop_vec_info
2363 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
2364 {
2365   loop_vec_info loop_vinfo;
2366   unsigned int vector_sizes;
2367
2368   /* Autodetect first vector size we try.  */
2369   current_vector_size = 0;
2370   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2371
2372   if (dump_enabled_p ())
2373     dump_printf_loc (MSG_NOTE, vect_location,
2374                      "===== analyze_loop_nest =====\n");
2375
2376   if (loop_outer (loop)
2377       && loop_vec_info_for_loop (loop_outer (loop))
2378       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2379     {
2380       if (dump_enabled_p ())
2381         dump_printf_loc (MSG_NOTE, vect_location,
2382                          "outer-loop already vectorized.\n");
2383       return NULL;
2384     }
2385
2386   while (1)
2387     {
2388       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2389       loop_vinfo = vect_analyze_loop_form (loop);
2390       if (!loop_vinfo)
2391         {
2392           if (dump_enabled_p ())
2393             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394                              "bad loop form.\n");
2395           return NULL;
2396         }
2397
2398       bool fatal = false;
2399
2400       if (orig_loop_vinfo)
2401         LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
2402
2403       if (vect_analyze_loop_2 (loop_vinfo, fatal))
2404         {
2405           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2406
2407           return loop_vinfo;
2408         }
2409
2410       destroy_loop_vec_info (loop_vinfo, true);
2411
2412       vector_sizes &= ~current_vector_size;
2413       if (fatal
2414           || vector_sizes == 0
2415           || current_vector_size == 0)
2416         return NULL;
2417
2418       /* Try the next biggest vector size.  */
2419       current_vector_size = 1 << floor_log2 (vector_sizes);
2420       if (dump_enabled_p ())
2421         dump_printf_loc (MSG_NOTE, vect_location,
2422                          "***** Re-trying analysis with "
2423                          "vector size %d\n", current_vector_size);
2424     }
2425 }
2426
2427
2428 /* Function reduction_code_for_scalar_code
2429
2430    Input:
2431    CODE - tree_code of a reduction operations.
2432
2433    Output:
2434    REDUC_CODE - the corresponding tree-code to be used to reduce the
2435       vector of partial results into a single scalar result, or ERROR_MARK
2436       if the operation is a supported reduction operation, but does not have
2437       such a tree-code.
2438
2439    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2440
2441 static bool
2442 reduction_code_for_scalar_code (enum tree_code code,
2443                                 enum tree_code *reduc_code)
2444 {
2445   switch (code)
2446     {
2447       case MAX_EXPR:
2448         *reduc_code = REDUC_MAX_EXPR;
2449         return true;
2450
2451       case MIN_EXPR:
2452         *reduc_code = REDUC_MIN_EXPR;
2453         return true;
2454
2455       case PLUS_EXPR:
2456         *reduc_code = REDUC_PLUS_EXPR;
2457         return true;
2458
2459       case MULT_EXPR:
2460       case MINUS_EXPR:
2461       case BIT_IOR_EXPR:
2462       case BIT_XOR_EXPR:
2463       case BIT_AND_EXPR:
2464         *reduc_code = ERROR_MARK;
2465         return true;
2466
2467       default:
2468        return false;
2469     }
2470 }
2471
2472
2473 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2474    STMT is printed with a message MSG. */
2475
2476 static void
2477 report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
2478 {
2479   dump_printf_loc (msg_type, vect_location, "%s", msg);
2480   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2481 }
2482
2483
2484 /* Detect SLP reduction of the form:
2485
2486    #a1 = phi <a5, a0>
2487    a2 = operation (a1)
2488    a3 = operation (a2)
2489    a4 = operation (a3)
2490    a5 = operation (a4)
2491
2492    #a = phi <a5>
2493
2494    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2495    FIRST_STMT is the first reduction stmt in the chain
2496    (a2 = operation (a1)).
2497
2498    Return TRUE if a reduction chain was detected.  */
2499
2500 static bool
2501 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2502                        gimple *first_stmt)
2503 {
2504   struct loop *loop = (gimple_bb (phi))->loop_father;
2505   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2506   enum tree_code code;
2507   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2508   stmt_vec_info use_stmt_info, current_stmt_info;
2509   tree lhs;
2510   imm_use_iterator imm_iter;
2511   use_operand_p use_p;
2512   int nloop_uses, size = 0, n_out_of_loop_uses;
2513   bool found = false;
2514
2515   if (loop != vect_loop)
2516     return false;
2517
2518   lhs = PHI_RESULT (phi);
2519   code = gimple_assign_rhs_code (first_stmt);
2520   while (1)
2521     {
2522       nloop_uses = 0;
2523       n_out_of_loop_uses = 0;
2524       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2525         {
2526           gimple *use_stmt = USE_STMT (use_p);
2527           if (is_gimple_debug (use_stmt))
2528             continue;
2529
2530           /* Check if we got back to the reduction phi.  */
2531           if (use_stmt == phi)
2532             {
2533               loop_use_stmt = use_stmt;
2534               found = true;
2535               break;
2536             }
2537
2538           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2539             {
2540               loop_use_stmt = use_stmt;
2541               nloop_uses++;
2542             }
2543            else
2544              n_out_of_loop_uses++;
2545
2546            /* There are can be either a single use in the loop or two uses in
2547               phi nodes.  */
2548            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2549              return false;
2550         }
2551
2552       if (found)
2553         break;
2554
2555       /* We reached a statement with no loop uses.  */
2556       if (nloop_uses == 0)
2557         return false;
2558
2559       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2560       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2561         return false;
2562
2563       if (!is_gimple_assign (loop_use_stmt)
2564           || code != gimple_assign_rhs_code (loop_use_stmt)
2565           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2566         return false;
2567
2568       /* Insert USE_STMT into reduction chain.  */
2569       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2570       if (current_stmt)
2571         {
2572           current_stmt_info = vinfo_for_stmt (current_stmt);
2573           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2574           GROUP_FIRST_ELEMENT (use_stmt_info)
2575             = GROUP_FIRST_ELEMENT (current_stmt_info);
2576         }
2577       else
2578         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2579
2580       lhs = gimple_assign_lhs (loop_use_stmt);
2581       current_stmt = loop_use_stmt;
2582       size++;
2583    }
2584
2585   if (!found || loop_use_stmt != phi || size < 2)
2586     return false;
2587
2588   /* Swap the operands, if needed, to make the reduction operand be the second
2589      operand.  */
2590   lhs = PHI_RESULT (phi);
2591   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2592   while (next_stmt)
2593     {
2594       if (gimple_assign_rhs2 (next_stmt) == lhs)
2595         {
2596           tree op = gimple_assign_rhs1 (next_stmt);
2597           gimple *def_stmt = NULL;
2598
2599           if (TREE_CODE (op) == SSA_NAME)
2600             def_stmt = SSA_NAME_DEF_STMT (op);
2601
2602           /* Check that the other def is either defined in the loop
2603              ("vect_internal_def"), or it's an induction (defined by a
2604              loop-header phi-node).  */
2605           if (def_stmt
2606               && gimple_bb (def_stmt)
2607               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2608               && (is_gimple_assign (def_stmt)
2609                   || is_gimple_call (def_stmt)
2610                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2611                            == vect_induction_def
2612                   || (gimple_code (def_stmt) == GIMPLE_PHI
2613                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2614                                   == vect_internal_def
2615                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2616             {
2617               lhs = gimple_assign_lhs (next_stmt);
2618               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2619               continue;
2620             }
2621
2622           return false;
2623         }
2624       else
2625         {
2626           tree op = gimple_assign_rhs2 (next_stmt);
2627           gimple *def_stmt = NULL;
2628
2629           if (TREE_CODE (op) == SSA_NAME)
2630             def_stmt = SSA_NAME_DEF_STMT (op);
2631
2632           /* Check that the other def is either defined in the loop
2633             ("vect_internal_def"), or it's an induction (defined by a
2634             loop-header phi-node).  */
2635           if (def_stmt
2636               && gimple_bb (def_stmt)
2637               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2638               && (is_gimple_assign (def_stmt)
2639                   || is_gimple_call (def_stmt)
2640                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2641                               == vect_induction_def
2642                   || (gimple_code (def_stmt) == GIMPLE_PHI
2643                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2644                                   == vect_internal_def
2645                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2646             {
2647               if (dump_enabled_p ())
2648                 {
2649                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2650                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2651                 }
2652
2653               swap_ssa_operands (next_stmt,
2654                                  gimple_assign_rhs1_ptr (next_stmt),
2655                                  gimple_assign_rhs2_ptr (next_stmt));
2656               update_stmt (next_stmt);
2657
2658               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2659                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2660             }
2661           else
2662             return false;
2663         }
2664
2665       lhs = gimple_assign_lhs (next_stmt);
2666       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2667     }
2668
2669   /* Save the chain for further analysis in SLP detection.  */
2670   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2671   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2672   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2673
2674   return true;
2675 }
2676
2677
2678 /* Function vect_is_simple_reduction
2679
2680    (1) Detect a cross-iteration def-use cycle that represents a simple
2681    reduction computation.  We look for the following pattern:
2682
2683    loop_header:
2684      a1 = phi < a0, a2 >
2685      a3 = ...
2686      a2 = operation (a3, a1)
2687
2688    or
2689
2690    a3 = ...
2691    loop_header:
2692      a1 = phi < a0, a2 >
2693      a2 = operation (a3, a1)
2694
2695    such that:
2696    1. operation is commutative and associative and it is safe to
2697       change the order of the computation
2698    2. no uses for a2 in the loop (a2 is used out of the loop)
2699    3. no uses of a1 in the loop besides the reduction operation
2700    4. no uses of a1 outside the loop.
2701
2702    Conditions 1,4 are tested here.
2703    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2704
2705    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2706    nested cycles.
2707
2708    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2709    reductions:
2710
2711      a1 = phi < a0, a2 >
2712      inner loop (def of a3)
2713      a2 = phi < a3 >
2714
2715    (4) Detect condition expressions, ie:
2716      for (int i = 0; i < N; i++)
2717        if (a[i] < val)
2718         ret_val = a[i];
2719
2720 */
2721
2722 static gimple *
2723 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2724                           bool *double_reduc,
2725                           bool need_wrapping_integral_overflow,
2726                           enum vect_reduction_type *v_reduc_type)
2727 {
2728   struct loop *loop = (gimple_bb (phi))->loop_father;
2729   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2730   edge latch_e = loop_latch_edge (loop);
2731   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2732   gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL;
2733   enum tree_code orig_code, code;
2734   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2735   tree type;
2736   int nloop_uses;
2737   tree name;
2738   imm_use_iterator imm_iter;
2739   use_operand_p use_p;
2740   bool phi_def;
2741
2742   *double_reduc = false;
2743   *v_reduc_type = TREE_CODE_REDUCTION;
2744
2745   /* Check validity of the reduction only for the innermost loop.  */
2746   bool check_reduction = ! flow_loop_nested_p (vect_loop, loop);
2747   gcc_assert ((check_reduction && loop == vect_loop)
2748               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2749
2750   name = PHI_RESULT (phi);
2751   /* ???  If there are no uses of the PHI result the inner loop reduction
2752      won't be detected as possibly double-reduction by vectorizable_reduction
2753      because that tries to walk the PHI arg from the preheader edge which
2754      can be constant.  See PR60382.  */
2755   if (has_zero_uses (name))
2756     return NULL;
2757   nloop_uses = 0;
2758   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2759     {
2760       gimple *use_stmt = USE_STMT (use_p);
2761       if (is_gimple_debug (use_stmt))
2762         continue;
2763
2764       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2765         {
2766           if (dump_enabled_p ())
2767             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2768                              "intermediate value used outside loop.\n");
2769
2770           return NULL;
2771         }
2772
2773       nloop_uses++;
2774       if (nloop_uses > 1)
2775         {
2776           if (dump_enabled_p ())
2777             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2778                              "reduction used in loop.\n");
2779           return NULL;
2780         }
2781
2782       phi_use_stmt = use_stmt;
2783     }
2784
2785   if (TREE_CODE (loop_arg) != SSA_NAME)
2786     {
2787       if (dump_enabled_p ())
2788         {
2789           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2790                            "reduction: not ssa_name: ");
2791           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2792           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2793         }
2794       return NULL;
2795     }
2796
2797   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2798   if (!def_stmt)
2799     {
2800       if (dump_enabled_p ())
2801         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2802                          "reduction: no def_stmt.\n");
2803       return NULL;
2804     }
2805
2806   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2807     {
2808       if (dump_enabled_p ())
2809         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2810       return NULL;
2811     }
2812
2813   if (is_gimple_assign (def_stmt))
2814     {
2815       name = gimple_assign_lhs (def_stmt);
2816       phi_def = false;
2817     }
2818   else
2819     {
2820       name = PHI_RESULT (def_stmt);
2821       phi_def = true;
2822     }
2823
2824   nloop_uses = 0;
2825   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2826     {
2827       gimple *use_stmt = USE_STMT (use_p);
2828       if (is_gimple_debug (use_stmt))
2829         continue;
2830       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2831         nloop_uses++;
2832       if (nloop_uses > 1)
2833         {
2834           if (dump_enabled_p ())
2835             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2836                              "reduction used in loop.\n");
2837           return NULL;
2838         }
2839     }
2840
2841   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2842      defined in the inner loop.  */
2843   if (phi_def)
2844     {
2845       op1 = PHI_ARG_DEF (def_stmt, 0);
2846
2847       if (gimple_phi_num_args (def_stmt) != 1
2848           || TREE_CODE (op1) != SSA_NAME)
2849         {
2850           if (dump_enabled_p ())
2851             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2852                              "unsupported phi node definition.\n");
2853
2854           return NULL;
2855         }
2856
2857       def1 = SSA_NAME_DEF_STMT (op1);
2858       if (gimple_bb (def1)
2859           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2860           && loop->inner
2861           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2862           && is_gimple_assign (def1)
2863           && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
2864         {
2865           if (dump_enabled_p ())
2866             report_vect_op (MSG_NOTE, def_stmt,
2867                             "detected double reduction: ");
2868
2869           *double_reduc = true;
2870           return def_stmt;
2871         }
2872
2873       return NULL;
2874     }
2875
2876   code = orig_code = gimple_assign_rhs_code (def_stmt);
2877
2878   /* We can handle "res -= x[i]", which is non-associative by
2879      simply rewriting this into "res += -x[i]".  Avoid changing
2880      gimple instruction for the first simple tests and only do this
2881      if we're allowed to change code at all.  */
2882   if (code == MINUS_EXPR
2883       && (op1 = gimple_assign_rhs1 (def_stmt))
2884       && TREE_CODE (op1) == SSA_NAME
2885       && SSA_NAME_DEF_STMT (op1) == phi)
2886     code = PLUS_EXPR;
2887
2888   if (code == COND_EXPR)
2889     {
2890       if (check_reduction)
2891         *v_reduc_type = COND_REDUCTION;
2892     }
2893   else if (!commutative_tree_code (code) || !associative_tree_code (code))
2894     {
2895       if (dump_enabled_p ())
2896         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2897                         "reduction: not commutative/associative: ");
2898       return NULL;
2899     }
2900
2901   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2902     {
2903       if (code != COND_EXPR)
2904         {
2905           if (dump_enabled_p ())
2906             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2907                             "reduction: not binary operation: ");
2908
2909           return NULL;
2910         }
2911
2912       op3 = gimple_assign_rhs1 (def_stmt);
2913       if (COMPARISON_CLASS_P (op3))
2914         {
2915           op4 = TREE_OPERAND (op3, 1);
2916           op3 = TREE_OPERAND (op3, 0);
2917         }
2918
2919       op1 = gimple_assign_rhs2 (def_stmt);
2920       op2 = gimple_assign_rhs3 (def_stmt);
2921
2922       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2923         {
2924           if (dump_enabled_p ())
2925             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2926                             "reduction: uses not ssa_names: ");
2927
2928           return NULL;
2929         }
2930     }
2931   else
2932     {
2933       op1 = gimple_assign_rhs1 (def_stmt);
2934       op2 = gimple_assign_rhs2 (def_stmt);
2935
2936       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2937         {
2938           if (dump_enabled_p ())
2939             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2940                             "reduction: uses not ssa_names: ");
2941
2942           return NULL;
2943         }
2944    }
2945
2946   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2947   if ((TREE_CODE (op1) == SSA_NAME
2948        && !types_compatible_p (type,TREE_TYPE (op1)))
2949       || (TREE_CODE (op2) == SSA_NAME
2950           && !types_compatible_p (type, TREE_TYPE (op2)))
2951       || (op3 && TREE_CODE (op3) == SSA_NAME
2952           && !types_compatible_p (type, TREE_TYPE (op3)))
2953       || (op4 && TREE_CODE (op4) == SSA_NAME
2954           && !types_compatible_p (type, TREE_TYPE (op4))))
2955     {
2956       if (dump_enabled_p ())
2957         {
2958           dump_printf_loc (MSG_NOTE, vect_location,
2959                            "reduction: multiple types: operation type: ");
2960           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2961           dump_printf (MSG_NOTE, ", operands types: ");
2962           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2963                              TREE_TYPE (op1));
2964           dump_printf (MSG_NOTE, ",");
2965           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2966                              TREE_TYPE (op2));
2967           if (op3)
2968             {
2969               dump_printf (MSG_NOTE, ",");
2970               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2971                                  TREE_TYPE (op3));
2972             }
2973
2974           if (op4)
2975             {
2976               dump_printf (MSG_NOTE, ",");
2977               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2978                                  TREE_TYPE (op4));
2979             }
2980           dump_printf (MSG_NOTE, "\n");
2981         }
2982
2983       return NULL;
2984     }
2985
2986   /* Check that it's ok to change the order of the computation.
2987      Generally, when vectorizing a reduction we change the order of the
2988      computation.  This may change the behavior of the program in some
2989      cases, so we need to check that this is ok.  One exception is when
2990      vectorizing an outer-loop: the inner-loop is executed sequentially,
2991      and therefore vectorizing reductions in the inner-loop during
2992      outer-loop vectorization is safe.  */
2993
2994   if (*v_reduc_type != COND_REDUCTION
2995       && check_reduction)
2996     {
2997       /* CHECKME: check for !flag_finite_math_only too?  */
2998       if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math)
2999         {
3000           /* Changing the order of operations changes the semantics.  */
3001           if (dump_enabled_p ())
3002             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3003                         "reduction: unsafe fp math optimization: ");
3004           return NULL;
3005         }
3006       else if (INTEGRAL_TYPE_P (type))
3007         {
3008           if (!operation_no_trapping_overflow (type, code))
3009             {
3010               /* Changing the order of operations changes the semantics.  */
3011               if (dump_enabled_p ())
3012                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3013                                 "reduction: unsafe int math optimization"
3014                                 " (overflow traps): ");
3015               return NULL;
3016             }
3017           if (need_wrapping_integral_overflow
3018               && !TYPE_OVERFLOW_WRAPS (type)
3019               && operation_can_overflow (code))
3020             {
3021               /* Changing the order of operations changes the semantics.  */
3022               if (dump_enabled_p ())
3023                 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3024                                 "reduction: unsafe int math optimization"
3025                                 " (overflow doesn't wrap): ");
3026               return NULL;
3027             }
3028         }
3029       else if (SAT_FIXED_POINT_TYPE_P (type))
3030         {
3031           /* Changing the order of operations changes the semantics.  */
3032           if (dump_enabled_p ())
3033           report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3034                           "reduction: unsafe fixed-point math optimization: ");
3035           return NULL;
3036         }
3037     }
3038
3039   /* Reduction is safe. We're dealing with one of the following:
3040      1) integer arithmetic and no trapv
3041      2) floating point arithmetic, and special flags permit this optimization
3042      3) nested cycle (i.e., outer loop vectorization).  */
3043   if (TREE_CODE (op1) == SSA_NAME)
3044     def1 = SSA_NAME_DEF_STMT (op1);
3045
3046   if (TREE_CODE (op2) == SSA_NAME)
3047     def2 = SSA_NAME_DEF_STMT (op2);
3048
3049   if (code != COND_EXPR
3050       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
3051     {
3052       if (dump_enabled_p ())
3053         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
3054       return NULL;
3055     }
3056
3057   /* Check that one def is the reduction def, defined by PHI,
3058      the other def is either defined in the loop ("vect_internal_def"),
3059      or it's an induction (defined by a loop-header phi-node).  */
3060
3061   if (def2 && def2 == phi
3062       && (code == COND_EXPR
3063           || !def1 || gimple_nop_p (def1)
3064           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
3065           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
3066               && (is_gimple_assign (def1)
3067                   || is_gimple_call (def1)
3068                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3069                       == vect_induction_def
3070                   || (gimple_code (def1) == GIMPLE_PHI
3071                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3072                           == vect_internal_def
3073                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
3074     {
3075       if (dump_enabled_p ())
3076         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3077       return def_stmt;
3078     }
3079
3080   if (def1 && def1 == phi
3081       && (code == COND_EXPR
3082           || !def2 || gimple_nop_p (def2)
3083           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
3084           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
3085               && (is_gimple_assign (def2)
3086                   || is_gimple_call (def2)
3087                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3088                        == vect_induction_def
3089                   || (gimple_code (def2) == GIMPLE_PHI
3090                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3091                            == vect_internal_def
3092                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
3093     {
3094       if (check_reduction && orig_code != MINUS_EXPR)
3095         {
3096           /* Check if we can swap operands (just for simplicity - so that
3097              the rest of the code can assume that the reduction variable
3098              is always the last (second) argument).  */
3099           if (code == COND_EXPR)
3100             {
3101               /* Swap cond_expr by inverting the condition.  */
3102               tree cond_expr = gimple_assign_rhs1 (def_stmt);
3103               enum tree_code invert_code = ERROR_MARK;
3104               enum tree_code cond_code = TREE_CODE (cond_expr);
3105
3106               if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
3107                 {
3108                   bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
3109                   invert_code = invert_tree_comparison (cond_code, honor_nans);
3110                 }
3111               if (invert_code != ERROR_MARK)
3112                 {
3113                   TREE_SET_CODE (cond_expr, invert_code);
3114                   swap_ssa_operands (def_stmt,
3115                                      gimple_assign_rhs2_ptr (def_stmt),
3116                                      gimple_assign_rhs3_ptr (def_stmt));
3117                 }
3118               else
3119                 {
3120                   if (dump_enabled_p ())
3121                     report_vect_op (MSG_NOTE, def_stmt,
3122                                     "detected reduction: cannot swap operands "
3123                                     "for cond_expr");
3124                   return NULL;
3125                 }
3126             }
3127           else
3128             swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
3129                                gimple_assign_rhs2_ptr (def_stmt));
3130
3131           if (dump_enabled_p ())
3132             report_vect_op (MSG_NOTE, def_stmt,
3133                             "detected reduction: need to swap operands: ");
3134
3135           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
3136             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
3137         }
3138       else
3139         {
3140           if (dump_enabled_p ())
3141             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3142         }
3143
3144       return def_stmt;
3145     }
3146
3147   /* Try to find SLP reduction chain.  */
3148   if (check_reduction && code != COND_EXPR
3149       && vect_is_slp_reduction (loop_info, phi, def_stmt))
3150     {
3151       if (dump_enabled_p ())
3152         report_vect_op (MSG_NOTE, def_stmt,
3153                         "reduction: detected reduction chain: ");
3154
3155       return def_stmt;
3156     }
3157
3158   if (dump_enabled_p ())
3159     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3160                     "reduction: unknown pattern: ");
3161
3162   return NULL;
3163 }
3164
3165 /* Wrapper around vect_is_simple_reduction, which will modify code
3166    in-place if it enables detection of more reductions.  Arguments
3167    as there.  */
3168
3169 gimple *
3170 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
3171                              bool *double_reduc,
3172                              bool need_wrapping_integral_overflow)
3173 {
3174   enum vect_reduction_type v_reduc_type;
3175   gimple *def = vect_is_simple_reduction (loop_info, phi, double_reduc,
3176                                           need_wrapping_integral_overflow,
3177                                           &v_reduc_type);
3178   if (def)
3179     {
3180       stmt_vec_info reduc_def_info = vinfo_for_stmt (phi);
3181       STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type;
3182       STMT_VINFO_REDUC_DEF (reduc_def_info) = def;
3183     }
3184   return def;
3185 }
3186
3187 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
3188 int
3189 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3190                              int *peel_iters_epilogue,
3191                              stmt_vector_for_cost *scalar_cost_vec,
3192                              stmt_vector_for_cost *prologue_cost_vec,
3193                              stmt_vector_for_cost *epilogue_cost_vec)
3194 {
3195   int retval = 0;
3196   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3197
3198   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3199     {
3200       *peel_iters_epilogue = vf/2;
3201       if (dump_enabled_p ())
3202         dump_printf_loc (MSG_NOTE, vect_location,
3203                          "cost model: epilogue peel iters set to vf/2 "
3204                          "because loop iterations are unknown .\n");
3205
3206       /* If peeled iterations are known but number of scalar loop
3207          iterations are unknown, count a taken branch per peeled loop.  */
3208       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3209                                  NULL, 0, vect_prologue);
3210       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
3211                                  NULL, 0, vect_epilogue);
3212     }
3213   else
3214     {
3215       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3216       peel_iters_prologue = niters < peel_iters_prologue ?
3217                             niters : peel_iters_prologue;
3218       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
3219       /* If we need to peel for gaps, but no peeling is required, we have to
3220          peel VF iterations.  */
3221       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3222         *peel_iters_epilogue = vf;
3223     }
3224
3225   stmt_info_for_cost *si;
3226   int j;
3227   if (peel_iters_prologue)
3228     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3229         {
3230           stmt_vec_info stmt_info
3231             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3232           retval += record_stmt_cost (prologue_cost_vec,
3233                                       si->count * peel_iters_prologue,
3234                                       si->kind, stmt_info, si->misalign,
3235                                       vect_prologue);
3236         }
3237   if (*peel_iters_epilogue)
3238     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3239         {
3240           stmt_vec_info stmt_info
3241             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3242           retval += record_stmt_cost (epilogue_cost_vec,
3243                                       si->count * *peel_iters_epilogue,
3244                                       si->kind, stmt_info, si->misalign,
3245                                       vect_epilogue);
3246         }
3247
3248   return retval;
3249 }
3250
3251 /* Function vect_estimate_min_profitable_iters
3252
3253    Return the number of iterations required for the vector version of the
3254    loop to be profitable relative to the cost of the scalar version of the
3255    loop.
3256
3257    *RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
3258    of iterations for vectorization.  -1 value means loop vectorization
3259    is not profitable.  This returned value may be used for dynamic
3260    profitability check.
3261
3262    *RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
3263    for static check against estimated number of iterations.  */
3264
3265 static void
3266 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
3267                                     int *ret_min_profitable_niters,
3268                                     int *ret_min_profitable_estimate)
3269 {
3270   int min_profitable_iters;
3271   int min_profitable_estimate;
3272   int peel_iters_prologue;
3273   int peel_iters_epilogue;
3274   unsigned vec_inside_cost = 0;
3275   int vec_outside_cost = 0;
3276   unsigned vec_prologue_cost = 0;
3277   unsigned vec_epilogue_cost = 0;
3278   int scalar_single_iter_cost = 0;
3279   int scalar_outside_cost = 0;
3280   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3281   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3282   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3283
3284   /* Cost model disabled.  */
3285   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3286     {
3287       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
3288       *ret_min_profitable_niters = 0;
3289       *ret_min_profitable_estimate = 0;
3290       return;
3291     }
3292
3293   /* Requires loop versioning tests to handle misalignment.  */
3294   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3295     {
3296       /*  FIXME: Make cost depend on complexity of individual check.  */
3297       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
3298       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3299                             vect_prologue);
3300       dump_printf (MSG_NOTE,
3301                    "cost model: Adding cost of checks for loop "
3302                    "versioning to treat misalignment.\n");
3303     }
3304
3305   /* Requires loop versioning with alias checks.  */
3306   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3307     {
3308       /*  FIXME: Make cost depend on complexity of individual check.  */
3309       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
3310       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
3311                             vect_prologue);
3312       dump_printf (MSG_NOTE,
3313                    "cost model: Adding cost of checks for loop "
3314                    "versioning aliasing.\n");
3315     }
3316
3317   /* Requires loop versioning with niter checks.  */
3318   if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
3319     {
3320       /*  FIXME: Make cost depend on complexity of individual check.  */
3321       (void) add_stmt_cost (target_cost_data, 1, vector_stmt, NULL, 0,
3322                             vect_prologue);
3323       dump_printf (MSG_NOTE,
3324                    "cost model: Adding cost of checks for loop "
3325                    "versioning niters.\n");
3326     }
3327
3328   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3329     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
3330                           vect_prologue);
3331
3332   /* Count statements in scalar loop.  Using this as scalar cost for a single
3333      iteration for now.
3334
3335      TODO: Add outer loop support.
3336
3337      TODO: Consider assigning different costs to different scalar
3338      statements.  */
3339
3340   scalar_single_iter_cost
3341     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3342
3343   /* Add additional cost for the peeled instructions in prologue and epilogue
3344      loop.
3345
3346      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3347      at compile-time - we assume it's vf/2 (the worst would be vf-1).
3348
3349      TODO: Build an expression that represents peel_iters for prologue and
3350      epilogue to be used in a run-time test.  */
3351
3352   if (npeel  < 0)
3353     {
3354       peel_iters_prologue = vf/2;
3355       dump_printf (MSG_NOTE, "cost model: "
3356                    "prologue peel iters set to vf/2.\n");
3357
3358       /* If peeling for alignment is unknown, loop bound of main loop becomes
3359          unknown.  */
3360       peel_iters_epilogue = vf/2;
3361       dump_printf (MSG_NOTE, "cost model: "
3362                    "epilogue peel iters set to vf/2 because "
3363                    "peeling for alignment is unknown.\n");
3364
3365       /* If peeled iterations are unknown, count a taken branch and a not taken
3366          branch per peeled loop. Even if scalar loop iterations are known,
3367          vector iterations are not known since peeled prologue iterations are
3368          not known. Hence guards remain the same.  */
3369       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3370                             NULL, 0, vect_prologue);
3371       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3372                             NULL, 0, vect_prologue);
3373       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
3374                             NULL, 0, vect_epilogue);
3375       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
3376                             NULL, 0, vect_epilogue);
3377       stmt_info_for_cost *si;
3378       int j;
3379       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3380         {
3381           struct _stmt_vec_info *stmt_info
3382             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3383           (void) add_stmt_cost (target_cost_data,
3384                                 si->count * peel_iters_prologue,
3385                                 si->kind, stmt_info, si->misalign,
3386                                 vect_prologue);
3387           (void) add_stmt_cost (target_cost_data,
3388                                 si->count * peel_iters_epilogue,
3389                                 si->kind, stmt_info, si->misalign,
3390                                 vect_epilogue);
3391         }
3392     }
3393   else
3394     {
3395       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
3396       stmt_info_for_cost *si;
3397       int j;
3398       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3399
3400       prologue_cost_vec.create (2);
3401       epilogue_cost_vec.create (2);
3402       peel_iters_prologue = npeel;
3403
3404       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
3405                                           &peel_iters_epilogue,
3406                                           &LOOP_VINFO_SCALAR_ITERATION_COST
3407                                             (loop_vinfo),
3408                                           &prologue_cost_vec,
3409                                           &epilogue_cost_vec);
3410
3411       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3412         {
3413           struct _stmt_vec_info *stmt_info
3414             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3415           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3416                                 si->misalign, vect_prologue);
3417         }
3418
3419       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3420         {
3421           struct _stmt_vec_info *stmt_info
3422             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3423           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3424                                 si->misalign, vect_epilogue);
3425         }
3426
3427       prologue_cost_vec.release ();
3428       epilogue_cost_vec.release ();
3429     }
3430
3431   /* FORNOW: The scalar outside cost is incremented in one of the
3432      following ways:
3433
3434      1. The vectorizer checks for alignment and aliasing and generates
3435      a condition that allows dynamic vectorization.  A cost model
3436      check is ANDED with the versioning condition.  Hence scalar code
3437      path now has the added cost of the versioning check.
3438
3439        if (cost > th & versioning_check)
3440          jmp to vector code
3441
3442      Hence run-time scalar is incremented by not-taken branch cost.
3443
3444      2. The vectorizer then checks if a prologue is required.  If the
3445      cost model check was not done before during versioning, it has to
3446      be done before the prologue check.
3447
3448        if (cost <= th)
3449          prologue = scalar_iters
3450        if (prologue == 0)
3451          jmp to vector code
3452        else
3453          execute prologue
3454        if (prologue == num_iters)
3455          go to exit
3456
3457      Hence the run-time scalar cost is incremented by a taken branch,
3458      plus a not-taken branch, plus a taken branch cost.
3459
3460      3. The vectorizer then checks if an epilogue is required.  If the
3461      cost model check was not done before during prologue check, it
3462      has to be done with the epilogue check.
3463
3464        if (prologue == 0)
3465          jmp to vector code
3466        else
3467          execute prologue
3468        if (prologue == num_iters)
3469          go to exit
3470        vector code:
3471          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3472            jmp to epilogue
3473
3474      Hence the run-time scalar cost should be incremented by 2 taken
3475      branches.
3476
3477      TODO: The back end may reorder the BBS's differently and reverse
3478      conditions/branch directions.  Change the estimates below to
3479      something more reasonable.  */
3480
3481   /* If the number of iterations is known and we do not do versioning, we can
3482      decide whether to vectorize at compile time.  Hence the scalar version
3483      do not carry cost model guard costs.  */
3484   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3485       || LOOP_REQUIRES_VERSIONING (loop_vinfo))
3486     {
3487       /* Cost model check occurs at versioning.  */
3488       if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
3489         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3490       else
3491         {
3492           /* Cost model check occurs at prologue generation.  */
3493           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3494             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3495               + vect_get_stmt_cost (cond_branch_not_taken);
3496           /* Cost model check occurs at epilogue generation.  */
3497           else
3498             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3499         }
3500     }
3501
3502   /* Complete the target-specific cost calculations.  */
3503   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3504                &vec_inside_cost, &vec_epilogue_cost);
3505
3506   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3507
3508   if (dump_enabled_p ())
3509     {
3510       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3511       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3512                    vec_inside_cost);
3513       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3514                    vec_prologue_cost);
3515       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3516                    vec_epilogue_cost);
3517       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3518                    scalar_single_iter_cost);
3519       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3520                    scalar_outside_cost);
3521       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3522                    vec_outside_cost);
3523       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3524                    peel_iters_prologue);
3525       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3526                    peel_iters_epilogue);
3527     }
3528
3529   /* Calculate number of iterations required to make the vector version
3530      profitable, relative to the loop bodies only.  The following condition
3531      must hold true:
3532      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3533      where
3534      SIC = scalar iteration cost, VIC = vector iteration cost,
3535      VOC = vector outside cost, VF = vectorization factor,
3536      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3537      SOC = scalar outside cost for run time cost model check.  */
3538
3539   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3540     {
3541       if (vec_outside_cost <= 0)
3542         min_profitable_iters = 1;
3543       else
3544         {
3545           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3546                                   - vec_inside_cost * peel_iters_prologue
3547                                   - vec_inside_cost * peel_iters_epilogue)
3548                                  / ((scalar_single_iter_cost * vf)
3549                                     - vec_inside_cost);
3550
3551           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3552               <= (((int) vec_inside_cost * min_profitable_iters)
3553                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3554             min_profitable_iters++;
3555         }
3556     }
3557   /* vector version will never be profitable.  */
3558   else
3559     {
3560       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3561         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3562                     "did not happen for a simd loop");
3563
3564       if (dump_enabled_p ())
3565         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3566                          "cost model: the vector iteration cost = %d "
3567                          "divided by the scalar iteration cost = %d "
3568                          "is greater or equal to the vectorization factor = %d"
3569                          ".\n",
3570                          vec_inside_cost, scalar_single_iter_cost, vf);
3571       *ret_min_profitable_niters = -1;
3572       *ret_min_profitable_estimate = -1;
3573       return;
3574     }
3575
3576   dump_printf (MSG_NOTE,
3577                "  Calculated minimum iters for profitability: %d\n",
3578                min_profitable_iters);
3579
3580   min_profitable_iters =
3581         min_profitable_iters < vf ? vf : min_profitable_iters;
3582
3583   /* Because the condition we create is:
3584      if (niters <= min_profitable_iters)
3585        then skip the vectorized loop.  */
3586   min_profitable_iters--;
3587
3588   if (dump_enabled_p ())
3589     dump_printf_loc (MSG_NOTE, vect_location,
3590                      "  Runtime profitability threshold = %d\n",
3591                      min_profitable_iters);
3592
3593   *ret_min_profitable_niters = min_profitable_iters;
3594
3595   /* Calculate number of iterations required to make the vector version
3596      profitable, relative to the loop bodies only.
3597
3598      Non-vectorized variant is SIC * niters and it must win over vector
3599      variant on the expected loop trip count.  The following condition must hold true:
3600      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3601
3602   if (vec_outside_cost <= 0)
3603     min_profitable_estimate = 1;
3604   else
3605     {
3606       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3607                                  - vec_inside_cost * peel_iters_prologue
3608                                  - vec_inside_cost * peel_iters_epilogue)
3609                                  / ((scalar_single_iter_cost * vf)
3610                                    - vec_inside_cost);
3611     }
3612   min_profitable_estimate --;
3613   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3614   if (dump_enabled_p ())
3615     dump_printf_loc (MSG_NOTE, vect_location,
3616                      "  Static estimate profitability threshold = %d\n",
3617                      min_profitable_estimate);
3618
3619   *ret_min_profitable_estimate = min_profitable_estimate;
3620 }
3621
3622 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3623    vector elements (not bits) for a vector of mode MODE.  */
3624 static void
3625 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3626                               unsigned char *sel)
3627 {
3628   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3629
3630   for (i = 0; i < nelt; i++)
3631     sel[i] = (i + offset) & (2*nelt - 1);
3632 }
3633
3634 /* Checks whether the target supports whole-vector shifts for vectors of mode
3635    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3636    it supports vec_perm_const with masks for all necessary shift amounts.  */
3637 static bool
3638 have_whole_vector_shift (enum machine_mode mode)
3639 {
3640   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3641     return true;
3642
3643   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3644     return false;
3645
3646   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3647   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3648
3649   for (i = nelt/2; i >= 1; i/=2)
3650     {
3651       calc_vec_perm_mask_for_shift (mode, i, sel);
3652       if (!can_vec_perm_p (mode, false, sel))
3653         return false;
3654     }
3655   return true;
3656 }
3657
3658 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3659
3660 static tree
3661 get_reduction_op (gimple *stmt, int reduc_index)
3662 {
3663   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3664     {
3665     case GIMPLE_SINGLE_RHS:
3666       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3667                   == ternary_op);
3668       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3669     case GIMPLE_UNARY_RHS:
3670       return gimple_assign_rhs1 (stmt);
3671     case GIMPLE_BINARY_RHS:
3672       return (reduc_index
3673               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3674     case GIMPLE_TERNARY_RHS:
3675       return gimple_op (stmt, reduc_index + 1);
3676     default:
3677       gcc_unreachable ();
3678     }
3679 }
3680
3681 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3682    functions. Design better to avoid maintenance issues.  */
3683
3684 /* Function vect_model_reduction_cost.
3685
3686    Models cost for a reduction operation, including the vector ops
3687    generated within the strip-mine loop, the initial definition before
3688    the loop, and the epilogue code that must be generated.  */
3689
3690 static bool
3691 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3692                            int ncopies, int reduc_index)
3693 {
3694   int prologue_cost = 0, epilogue_cost = 0;
3695   enum tree_code code;
3696   optab optab;
3697   tree vectype;
3698   gimple *stmt, *orig_stmt;
3699   tree reduction_op;
3700   machine_mode mode;
3701   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3702   struct loop *loop = NULL;
3703   void *target_cost_data;
3704
3705   if (loop_vinfo)
3706     {
3707       loop = LOOP_VINFO_LOOP (loop_vinfo);
3708       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3709     }
3710   else
3711     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3712
3713   /* Condition reductions generate two reductions in the loop.  */
3714   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3715     ncopies *= 2;
3716
3717   /* Cost of reduction op inside loop.  */
3718   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3719                                         stmt_info, 0, vect_body);
3720   stmt = STMT_VINFO_STMT (stmt_info);
3721
3722   reduction_op = get_reduction_op (stmt, reduc_index);
3723
3724   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3725   if (!vectype)
3726     {
3727       if (dump_enabled_p ())
3728         {
3729           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3730                            "unsupported data-type ");
3731           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3732                              TREE_TYPE (reduction_op));
3733           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3734         }
3735       return false;
3736    }
3737
3738   mode = TYPE_MODE (vectype);
3739   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3740
3741   if (!orig_stmt)
3742     orig_stmt = STMT_VINFO_STMT (stmt_info);
3743
3744   code = gimple_assign_rhs_code (orig_stmt);
3745
3746   /* Add in cost for initial definition.
3747      For cond reduction we have four vectors: initial index, step, initial
3748      result of the data reduction, initial value of the index reduction.  */
3749   int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
3750                        == COND_REDUCTION ? 4 : 1;
3751   prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
3752                                   scalar_to_vec, stmt_info, 0,
3753                                   vect_prologue);
3754
3755   /* Determine cost of epilogue code.
3756
3757      We have a reduction operator that will reduce the vector in one statement.
3758      Also requires scalar extract.  */
3759
3760   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3761     {
3762       if (reduc_code != ERROR_MARK)
3763         {
3764           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
3765             {
3766               /* An EQ stmt and an COND_EXPR stmt.  */
3767               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3768                                               vector_stmt, stmt_info, 0,
3769                                               vect_epilogue);
3770               /* Reduction of the max index and a reduction of the found
3771                  values.  */
3772               epilogue_cost += add_stmt_cost (target_cost_data, 2,
3773                                               vec_to_scalar, stmt_info, 0,
3774                                               vect_epilogue);
3775               /* A broadcast of the max value.  */
3776               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3777                                               scalar_to_vec, stmt_info, 0,
3778                                               vect_epilogue);
3779             }
3780           else
3781             {
3782               epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3783                                               stmt_info, 0, vect_epilogue);
3784               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3785                                               vec_to_scalar, stmt_info, 0,
3786                                               vect_epilogue);
3787             }
3788         }
3789       else
3790         {
3791           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3792           tree bitsize =
3793             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3794           int element_bitsize = tree_to_uhwi (bitsize);
3795           int nelements = vec_size_in_bits / element_bitsize;
3796
3797           optab = optab_for_tree_code (code, vectype, optab_default);
3798
3799           /* We have a whole vector shift available.  */
3800           if (VECTOR_MODE_P (mode)
3801               && optab_handler (optab, mode) != CODE_FOR_nothing
3802               && have_whole_vector_shift (mode))
3803             {
3804               /* Final reduction via vector shifts and the reduction operator.
3805                  Also requires scalar extract.  */
3806               epilogue_cost += add_stmt_cost (target_cost_data,
3807                                               exact_log2 (nelements) * 2,
3808                                               vector_stmt, stmt_info, 0,
3809                                               vect_epilogue);
3810               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3811                                               vec_to_scalar, stmt_info, 0,
3812                                               vect_epilogue);
3813             }
3814           else
3815             /* Use extracts and reduction op for final reduction.  For N
3816                elements, we have N extracts and N-1 reduction ops.  */
3817             epilogue_cost += add_stmt_cost (target_cost_data,
3818                                             nelements + nelements - 1,
3819                                             vector_stmt, stmt_info, 0,
3820                                             vect_epilogue);
3821         }
3822     }
3823
3824   if (dump_enabled_p ())
3825     dump_printf (MSG_NOTE,
3826                  "vect_model_reduction_cost: inside_cost = %d, "
3827                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3828                  prologue_cost, epilogue_cost);
3829
3830   return true;
3831 }
3832
3833
3834 /* Function vect_model_induction_cost.
3835
3836    Models cost for induction operations.  */
3837
3838 static void
3839 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3840 {
3841   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3842   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3843   unsigned inside_cost, prologue_cost;
3844
3845   if (PURE_SLP_STMT (stmt_info))
3846     return;
3847
3848   /* loop cost for vec_loop.  */
3849   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3850                                stmt_info, 0, vect_body);
3851
3852   /* prologue cost for vec_init and vec_step.  */
3853   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3854                                  stmt_info, 0, vect_prologue);
3855
3856   if (dump_enabled_p ())
3857     dump_printf_loc (MSG_NOTE, vect_location,
3858                      "vect_model_induction_cost: inside_cost = %d, "
3859                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3860 }
3861
3862
3863
3864 /* Function get_initial_def_for_reduction
3865
3866    Input:
3867    STMT - a stmt that performs a reduction operation in the loop.
3868    INIT_VAL - the initial value of the reduction variable
3869
3870    Output:
3871    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3872         of the reduction (used for adjusting the epilog - see below).
3873    Return a vector variable, initialized according to the operation that STMT
3874         performs. This vector will be used as the initial value of the
3875         vector of partial results.
3876
3877    Option1 (adjust in epilog): Initialize the vector as follows:
3878      add/bit or/xor:    [0,0,...,0,0]
3879      mult/bit and:      [1,1,...,1,1]
3880      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3881    and when necessary (e.g. add/mult case) let the caller know
3882    that it needs to adjust the result by init_val.
3883
3884    Option2: Initialize the vector as follows:
3885      add/bit or/xor:    [init_val,0,0,...,0]
3886      mult/bit and:      [init_val,1,1,...,1]
3887      min/max/cond_expr: [init_val,init_val,...,init_val]
3888    and no adjustments are needed.
3889
3890    For example, for the following code:
3891
3892    s = init_val;
3893    for (i=0;i<n;i++)
3894      s = s + a[i];
3895
3896    STMT is 's = s + a[i]', and the reduction variable is 's'.
3897    For a vector of 4 units, we want to return either [0,0,0,init_val],
3898    or [0,0,0,0] and let the caller know that it needs to adjust
3899    the result at the end by 'init_val'.
3900
3901    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3902    initialization vector is simpler (same element in all entries), if
3903    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3904
3905    A cost model should help decide between these two schemes.  */
3906
3907 tree
3908 get_initial_def_for_reduction (gimple *stmt, tree init_val,
3909                                tree *adjustment_def)
3910 {
3911   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3912   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3913   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3914   tree scalar_type = TREE_TYPE (init_val);
3915   tree vectype = get_vectype_for_scalar_type (scalar_type);
3916   int nunits;
3917   enum tree_code code = gimple_assign_rhs_code (stmt);
3918   tree def_for_init;
3919   tree init_def;
3920   tree *elts;
3921   int i;
3922   bool nested_in_vect_loop = false;
3923   REAL_VALUE_TYPE real_init_val = dconst0;
3924   int int_init_val = 0;
3925   gimple *def_stmt = NULL;
3926   gimple_seq stmts = NULL;
3927
3928   gcc_assert (vectype);
3929   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3930
3931   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3932               || SCALAR_FLOAT_TYPE_P (scalar_type));
3933
3934   if (nested_in_vect_loop_p (loop, stmt))
3935     nested_in_vect_loop = true;
3936   else
3937     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3938
3939   /* In case of double reduction we only create a vector variable to be put
3940      in the reduction phi node.  The actual statement creation is done in
3941      vect_create_epilog_for_reduction.  */
3942   if (adjustment_def && nested_in_vect_loop
3943       && TREE_CODE (init_val) == SSA_NAME
3944       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3945       && gimple_code (def_stmt) == GIMPLE_PHI
3946       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3947       && vinfo_for_stmt (def_stmt)
3948       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3949           == vect_double_reduction_def)
3950     {
3951       *adjustment_def = NULL;
3952       return vect_create_destination_var (init_val, vectype);
3953     }
3954
3955   /* In case of a nested reduction do not use an adjustment def as
3956      that case is not supported by the epilogue generation correctly
3957      if ncopies is not one.  */
3958   if (adjustment_def && nested_in_vect_loop)
3959     {
3960       *adjustment_def = NULL;
3961       return vect_get_vec_def_for_operand (init_val, stmt);
3962     }
3963
3964   switch (code)
3965     {
3966       case WIDEN_SUM_EXPR:
3967       case DOT_PROD_EXPR:
3968       case SAD_EXPR:
3969       case PLUS_EXPR:
3970       case MINUS_EXPR:
3971       case BIT_IOR_EXPR:
3972       case BIT_XOR_EXPR:
3973       case MULT_EXPR:
3974       case BIT_AND_EXPR:
3975         /* ADJUSMENT_DEF is NULL when called from
3976            vect_create_epilog_for_reduction to vectorize double reduction.  */
3977         if (adjustment_def)
3978           *adjustment_def = init_val;
3979
3980         if (code == MULT_EXPR)
3981           {
3982             real_init_val = dconst1;
3983             int_init_val = 1;
3984           }
3985
3986         if (code == BIT_AND_EXPR)
3987           int_init_val = -1;
3988
3989         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3990           def_for_init = build_real (scalar_type, real_init_val);
3991         else
3992           def_for_init = build_int_cst (scalar_type, int_init_val);
3993
3994         /* Create a vector of '0' or '1' except the first element.  */
3995         elts = XALLOCAVEC (tree, nunits);
3996         for (i = nunits - 2; i >= 0; --i)
3997           elts[i + 1] = def_for_init;
3998
3999         /* Option1: the first element is '0' or '1' as well.  */
4000         if (adjustment_def)
4001           {
4002             elts[0] = def_for_init;
4003             init_def = build_vector (vectype, elts);
4004             break;
4005           }
4006
4007         /* Option2: the first element is INIT_VAL.  */
4008         elts[0] = init_val;
4009         if (TREE_CONSTANT (init_val))
4010           init_def = build_vector (vectype, elts);
4011         else
4012           {
4013             vec<constructor_elt, va_gc> *v;
4014             vec_alloc (v, nunits);
4015             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
4016             for (i = 1; i < nunits; ++i)
4017               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
4018             init_def = build_constructor (vectype, v);
4019           }
4020
4021         break;
4022
4023       case MIN_EXPR:
4024       case MAX_EXPR:
4025       case COND_EXPR:
4026         if (adjustment_def)
4027           {
4028             *adjustment_def = NULL_TREE;
4029             if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
4030               {
4031                 init_def = vect_get_vec_def_for_operand (init_val, stmt);
4032                 break;
4033               }
4034           }
4035         init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
4036         if (! gimple_seq_empty_p (stmts))
4037           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4038         init_def = build_vector_from_val (vectype, init_val);
4039         break;
4040
4041       default:
4042         gcc_unreachable ();
4043     }
4044
4045   return init_def;
4046 }
4047
4048 /* Function vect_create_epilog_for_reduction
4049
4050    Create code at the loop-epilog to finalize the result of a reduction
4051    computation.
4052
4053    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4054      reduction statements.
4055    STMT is the scalar reduction stmt that is being vectorized.
4056    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4057      number of elements that we can fit in a vectype (nunits).  In this case
4058      we have to generate more than one vector stmt - i.e - we need to "unroll"
4059      the vector stmt by a factor VF/nunits.  For more details see documentation
4060      in vectorizable_operation.
4061    REDUC_CODE is the tree-code for the epilog reduction.
4062    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4063      computation.
4064    REDUC_INDEX is the index of the operand in the right hand side of the
4065      statement that is defined by REDUCTION_PHI.
4066    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4067    SLP_NODE is an SLP node containing a group of reduction statements. The
4068      first one in this group is STMT.
4069    INDUCTION_INDEX is the index of the loop for condition reductions.
4070      Otherwise it is undefined.
4071
4072    This function:
4073    1. Creates the reduction def-use cycles: sets the arguments for
4074       REDUCTION_PHIS:
4075       The loop-entry argument is the vectorized initial-value of the reduction.
4076       The loop-latch argument is taken from VECT_DEFS - the vector of partial
4077       sums.
4078    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4079       by applying the operation specified by REDUC_CODE if available, or by
4080       other means (whole-vector shifts or a scalar loop).
4081       The function also creates a new phi node at the loop exit to preserve
4082       loop-closed form, as illustrated below.
4083
4084      The flow at the entry to this function:
4085
4086         loop:
4087           vec_def = phi <null, null>            # REDUCTION_PHI
4088           VECT_DEF = vector_stmt                # vectorized form of STMT
4089           s_loop = scalar_stmt                  # (scalar) STMT
4090         loop_exit:
4091           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4092           use <s_out0>
4093           use <s_out0>
4094
4095      The above is transformed by this function into:
4096
4097         loop:
4098           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4099           VECT_DEF = vector_stmt                # vectorized form of STMT
4100           s_loop = scalar_stmt                  # (scalar) STMT
4101         loop_exit:
4102           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4103           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4104           v_out2 = reduce <v_out1>
4105           s_out3 = extract_field <v_out2, 0>
4106           s_out4 = adjust_result <s_out3>
4107           use <s_out4>
4108           use <s_out4>
4109 */
4110
4111 static void
4112 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4113                                   int ncopies, enum tree_code reduc_code,
4114                                   vec<gimple *> reduction_phis,
4115                                   int reduc_index, bool double_reduc,
4116                                   slp_tree slp_node, tree induction_index)
4117 {
4118   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4119   stmt_vec_info prev_phi_info;
4120   tree vectype;
4121   machine_mode mode;
4122   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4123   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4124   basic_block exit_bb;
4125   tree scalar_dest;
4126   tree scalar_type;
4127   gimple *new_phi = NULL, *phi;
4128   gimple_stmt_iterator exit_gsi;
4129   tree vec_dest;
4130   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4131   gimple *epilog_stmt = NULL;
4132   enum tree_code code = gimple_assign_rhs_code (stmt);
4133   gimple *exit_phi;
4134   tree bitsize;
4135   tree adjustment_def = NULL;
4136   tree vec_initial_def = NULL;
4137   tree reduction_op, expr, def, initial_def = NULL;
4138   tree orig_name, scalar_result;
4139   imm_use_iterator imm_iter, phi_imm_iter;
4140   use_operand_p use_p, phi_use_p;
4141   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4142   bool nested_in_vect_loop = false;
4143   auto_vec<gimple *> new_phis;
4144   auto_vec<gimple *> inner_phis;
4145   enum vect_def_type dt = vect_unknown_def_type;
4146   int j, i;
4147   auto_vec<tree> scalar_results;
4148   unsigned int group_size = 1, k, ratio;
4149   auto_vec<tree> vec_initial_defs;
4150   auto_vec<gimple *> phis;
4151   bool slp_reduc = false;
4152   tree new_phi_result;
4153   gimple *inner_phi = NULL;
4154
4155   if (slp_node)
4156     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4157
4158   if (nested_in_vect_loop_p (loop, stmt))
4159     {
4160       outer_loop = loop;
4161       loop = loop->inner;
4162       nested_in_vect_loop = true;
4163       gcc_assert (!slp_node);
4164     }
4165
4166   reduction_op = get_reduction_op (stmt, reduc_index);
4167
4168   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4169   gcc_assert (vectype);
4170   mode = TYPE_MODE (vectype);
4171
4172   /* 1. Create the reduction def-use cycle:
4173      Set the arguments of REDUCTION_PHIS, i.e., transform
4174
4175         loop:
4176           vec_def = phi <null, null>            # REDUCTION_PHI
4177           VECT_DEF = vector_stmt                # vectorized form of STMT
4178           ...
4179
4180      into:
4181
4182         loop:
4183           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4184           VECT_DEF = vector_stmt                # vectorized form of STMT
4185           ...
4186
4187      (in case of SLP, do it for all the phis). */
4188
4189   /* Get the loop-entry arguments.  */
4190   enum vect_def_type initial_def_dt = vect_unknown_def_type;
4191   if (slp_node)
4192     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4193                        NULL, slp_node, reduc_index);
4194   else
4195     {
4196       /* Get at the scalar def before the loop, that defines the initial value
4197          of the reduction variable.  */
4198       gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
4199       initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
4200                                            loop_preheader_edge (loop));
4201       vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
4202       vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
4203                                                        &adjustment_def);
4204       vec_initial_defs.create (1);
4205       vec_initial_defs.quick_push (vec_initial_def);
4206     }
4207
4208   /* Set phi nodes arguments.  */
4209   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4210     {
4211       tree vec_init_def, def;
4212       gimple_seq stmts;
4213       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4214                                            true, NULL_TREE);
4215       if (stmts)
4216         gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4217
4218       def = vect_defs[i];
4219       for (j = 0; j < ncopies; j++)
4220         {
4221           if (j != 0)
4222             {
4223               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4224               if (nested_in_vect_loop)
4225                 vec_init_def
4226                   = vect_get_vec_def_for_stmt_copy (initial_def_dt,
4227                                                     vec_init_def);
4228             }
4229
4230           /* Set the loop-entry arg of the reduction-phi.  */
4231
4232           if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4233               == INTEGER_INDUC_COND_REDUCTION)
4234             {
4235               /* Initialise the reduction phi to zero.  This prevents initial
4236                  values of non-zero interferring with the reduction op.  */
4237               gcc_assert (ncopies == 1);
4238               gcc_assert (i == 0);
4239
4240               tree vec_init_def_type = TREE_TYPE (vec_init_def);
4241               tree zero_vec = build_zero_cst (vec_init_def_type);
4242
4243               add_phi_arg (as_a <gphi *> (phi), zero_vec,
4244                            loop_preheader_edge (loop), UNKNOWN_LOCATION);
4245             }
4246           else
4247             add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4248                          loop_preheader_edge (loop), UNKNOWN_LOCATION);
4249
4250           /* Set the loop-latch arg for the reduction-phi.  */
4251           if (j > 0)
4252             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4253
4254           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4255                        UNKNOWN_LOCATION);
4256
4257           if (dump_enabled_p ())
4258             {
4259               dump_printf_loc (MSG_NOTE, vect_location,
4260                                "transform reduction: created def-use cycle: ");
4261               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4262               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4263             }
4264         }
4265     }
4266
4267   /* 2. Create epilog code.
4268         The reduction epilog code operates across the elements of the vector
4269         of partial results computed by the vectorized loop.
4270         The reduction epilog code consists of:
4271
4272         step 1: compute the scalar result in a vector (v_out2)
4273         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4274         step 3: adjust the scalar result (s_out3) if needed.
4275
4276         Step 1 can be accomplished using one the following three schemes:
4277           (scheme 1) using reduc_code, if available.
4278           (scheme 2) using whole-vector shifts, if available.
4279           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4280                      combined.
4281
4282           The overall epilog code looks like this:
4283
4284           s_out0 = phi <s_loop>         # original EXIT_PHI
4285           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4286           v_out2 = reduce <v_out1>              # step 1
4287           s_out3 = extract_field <v_out2, 0>    # step 2
4288           s_out4 = adjust_result <s_out3>       # step 3
4289
4290           (step 3 is optional, and steps 1 and 2 may be combined).
4291           Lastly, the uses of s_out0 are replaced by s_out4.  */
4292
4293
4294   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4295          v_out1 = phi <VECT_DEF>
4296          Store them in NEW_PHIS.  */
4297
4298   exit_bb = single_exit (loop)->dest;
4299   prev_phi_info = NULL;
4300   new_phis.create (vect_defs.length ());
4301   FOR_EACH_VEC_ELT (vect_defs, i, def)
4302     {
4303       for (j = 0; j < ncopies; j++)
4304         {
4305           tree new_def = copy_ssa_name (def);
4306           phi = create_phi_node (new_def, exit_bb);
4307           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
4308           if (j == 0)
4309             new_phis.quick_push (phi);
4310           else
4311             {
4312               def = vect_get_vec_def_for_stmt_copy (dt, def);
4313               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4314             }
4315
4316           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4317           prev_phi_info = vinfo_for_stmt (phi);
4318         }
4319     }
4320
4321   /* The epilogue is created for the outer-loop, i.e., for the loop being
4322      vectorized.  Create exit phis for the outer loop.  */
4323   if (double_reduc)
4324     {
4325       loop = outer_loop;
4326       exit_bb = single_exit (loop)->dest;
4327       inner_phis.create (vect_defs.length ());
4328       FOR_EACH_VEC_ELT (new_phis, i, phi)
4329         {
4330           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4331           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4332           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4333                            PHI_RESULT (phi));
4334           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4335                                                             loop_vinfo));
4336           inner_phis.quick_push (phi);
4337           new_phis[i] = outer_phi;
4338           prev_phi_info = vinfo_for_stmt (outer_phi);
4339           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4340             {
4341               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4342               new_result = copy_ssa_name (PHI_RESULT (phi));
4343               outer_phi = create_phi_node (new_result, exit_bb);
4344               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4345                                PHI_RESULT (phi));
4346               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4347                                                                 loop_vinfo));
4348               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4349               prev_phi_info = vinfo_for_stmt (outer_phi);
4350             }
4351         }
4352     }
4353
4354   exit_gsi = gsi_after_labels (exit_bb);
4355
4356   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4357          (i.e. when reduc_code is not available) and in the final adjustment
4358          code (if needed).  Also get the original scalar reduction variable as
4359          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4360          represents a reduction pattern), the tree-code and scalar-def are
4361          taken from the original stmt that the pattern-stmt (STMT) replaces.
4362          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4363          are taken from STMT.  */
4364
4365   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4366   if (!orig_stmt)
4367     {
4368       /* Regular reduction  */
4369       orig_stmt = stmt;
4370     }
4371   else
4372     {
4373       /* Reduction pattern  */
4374       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4375       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4376       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4377     }
4378
4379   code = gimple_assign_rhs_code (orig_stmt);
4380   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4381      partial results are added and not subtracted.  */
4382   if (code == MINUS_EXPR)
4383     code = PLUS_EXPR;
4384
4385   scalar_dest = gimple_assign_lhs (orig_stmt);
4386   scalar_type = TREE_TYPE (scalar_dest);
4387   scalar_results.create (group_size);
4388   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4389   bitsize = TYPE_SIZE (scalar_type);
4390
4391   /* In case this is a reduction in an inner-loop while vectorizing an outer
4392      loop - we don't need to extract a single scalar result at the end of the
4393      inner-loop (unless it is double reduction, i.e., the use of reduction is
4394      outside the outer-loop).  The final vector of partial results will be used
4395      in the vectorized outer-loop, or reduced to a scalar result at the end of
4396      the outer-loop.  */
4397   if (nested_in_vect_loop && !double_reduc)
4398     goto vect_finalize_reduction;
4399
4400   /* SLP reduction without reduction chain, e.g.,
4401      # a1 = phi <a2, a0>
4402      # b1 = phi <b2, b0>
4403      a2 = operation (a1)
4404      b2 = operation (b1)  */
4405   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4406
4407   /* In case of reduction chain, e.g.,
4408      # a1 = phi <a3, a0>
4409      a2 = operation (a1)
4410      a3 = operation (a2),
4411
4412      we may end up with more than one vector result.  Here we reduce them to
4413      one vector.  */
4414   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4415     {
4416       tree first_vect = PHI_RESULT (new_phis[0]);
4417       tree tmp;
4418       gassign *new_vec_stmt = NULL;
4419
4420       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4421       for (k = 1; k < new_phis.length (); k++)
4422         {
4423           gimple *next_phi = new_phis[k];
4424           tree second_vect = PHI_RESULT (next_phi);
4425
4426           tmp = build2 (code, vectype,  first_vect, second_vect);
4427           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4428           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4429           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4430           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4431         }
4432
4433       new_phi_result = first_vect;
4434       if (new_vec_stmt)
4435         {
4436           new_phis.truncate (0);
4437           new_phis.safe_push (new_vec_stmt);
4438         }
4439     }
4440   else
4441     new_phi_result = PHI_RESULT (new_phis[0]);
4442
4443   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4444     {
4445       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4446          various data values where the condition matched and another vector
4447          (INDUCTION_INDEX) containing all the indexes of those matches.  We
4448          need to extract the last matching index (which will be the index with
4449          highest value) and use this to index into the data vector.
4450          For the case where there were no matches, the data vector will contain
4451          all default values and the index vector will be all zeros.  */
4452
4453       /* Get various versions of the type of the vector of indexes.  */
4454       tree index_vec_type = TREE_TYPE (induction_index);
4455       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
4456       tree index_scalar_type = TREE_TYPE (index_vec_type);
4457       tree index_vec_cmp_type = build_same_sized_truth_vector_type
4458         (index_vec_type);
4459
4460       /* Get an unsigned integer version of the type of the data vector.  */
4461       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
4462       tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
4463       tree vectype_unsigned = build_vector_type
4464         (scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
4465
4466       /* First we need to create a vector (ZERO_VEC) of zeros and another
4467          vector (MAX_INDEX_VEC) filled with the last matching index, which we
4468          can create using a MAX reduction and then expanding.
4469          In the case where the loop never made any matches, the max index will
4470          be zero.  */
4471
4472       /* Vector of {0, 0, 0,...}.  */
4473       tree zero_vec = make_ssa_name (vectype);
4474       tree zero_vec_rhs = build_zero_cst (vectype);
4475       gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4476       gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4477
4478       /* Find maximum value from the vector of found indexes.  */
4479       tree max_index = make_ssa_name (index_scalar_type);
4480       gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
4481                                                     induction_index);
4482       gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4483
4484       /* Vector of {max_index, max_index, max_index,...}.  */
4485       tree max_index_vec = make_ssa_name (index_vec_type);
4486       tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4487                                                       max_index);
4488       gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
4489                                                         max_index_vec_rhs);
4490       gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
4491
4492       /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
4493          with the vector (INDUCTION_INDEX) of found indexes, choosing values
4494          from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
4495          otherwise.  Only one value should match, resulting in a vector
4496          (VEC_COND) with one data value and the rest zeros.
4497          In the case where the loop never made any matches, every index will
4498          match, resulting in a vector with all data values (which will all be
4499          the default value).  */
4500
4501       /* Compare the max index vector to the vector of found indexes to find
4502          the position of the max value.  */
4503       tree vec_compare = make_ssa_name (index_vec_cmp_type);
4504       gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
4505                                                       induction_index,
4506                                                       max_index_vec);
4507       gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
4508
4509       /* Use the compare to choose either values from the data vector or
4510          zero.  */
4511       tree vec_cond = make_ssa_name (vectype);
4512       gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
4513                                                    vec_compare, new_phi_result,
4514                                                    zero_vec);
4515       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
4516
4517       /* Finally we need to extract the data value from the vector (VEC_COND)
4518          into a scalar (MATCHED_DATA_REDUC).  Logically we want to do a OR
4519          reduction, but because this doesn't exist, we can use a MAX reduction
4520          instead.  The data value might be signed or a float so we need to cast
4521          it first.
4522          In the case where the loop never made any matches, the data values are
4523          all identical, and so will reduce down correctly.  */
4524
4525       /* Make the matched data values unsigned.  */
4526       tree vec_cond_cast = make_ssa_name (vectype_unsigned);
4527       tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
4528                                        vec_cond);
4529       gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
4530                                                         VIEW_CONVERT_EXPR,
4531                                                         vec_cond_cast_rhs);
4532       gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4533
4534       /* Reduce down to a scalar value.  */
4535       tree data_reduc = make_ssa_name (scalar_type_unsigned);
4536       optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
4537                                       optab_default);
4538       gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
4539                   != CODE_FOR_nothing);
4540       gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4541                                                      REDUC_MAX_EXPR,
4542                                                      vec_cond_cast);
4543       gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4544
4545       /* Convert the reduced value back to the result type and set as the
4546          result.  */
4547       tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
4548                                      data_reduc);
4549       epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
4550       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4551       gimple_assign_set_lhs (epilog_stmt, new_temp);
4552       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4553       scalar_results.safe_push (new_temp);
4554     }
4555
4556   /* 2.3 Create the reduction code, using one of the three schemes described
4557          above. In SLP we simply need to extract all the elements from the
4558          vector (without reducing them), so we use scalar shifts.  */
4559   else if (reduc_code != ERROR_MARK && !slp_reduc)
4560     {
4561       tree tmp;
4562       tree vec_elem_type;
4563
4564       /* Case 1:  Create:
4565          v_out2 = reduc_expr <v_out1>  */
4566
4567       if (dump_enabled_p ())
4568         dump_printf_loc (MSG_NOTE, vect_location,
4569                          "Reduce using direct vector reduction.\n");
4570
4571       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4572       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4573         {
4574           tree tmp_dest =
4575               vect_create_destination_var (scalar_dest, vec_elem_type);
4576           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4577           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4578           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4579           gimple_assign_set_lhs (epilog_stmt, new_temp);
4580           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4581
4582           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4583         }
4584       else
4585         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4586
4587       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4588       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4589       gimple_assign_set_lhs (epilog_stmt, new_temp);
4590       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4591
4592       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4593           == INTEGER_INDUC_COND_REDUCTION)
4594         {
4595           /* Earlier we set the initial value to be zero.  Check the result
4596              and if it is zero then replace with the original initial
4597              value.  */
4598           tree zero = build_zero_cst (scalar_type);
4599           tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero);
4600
4601           tmp = make_ssa_name (new_scalar_dest);
4602           epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4603                                              initial_def, new_temp);
4604           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4605           new_temp = tmp;
4606         }
4607
4608       scalar_results.safe_push (new_temp);
4609     }
4610   else
4611     {
4612       bool reduce_with_shift = have_whole_vector_shift (mode);
4613       int element_bitsize = tree_to_uhwi (bitsize);
4614       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4615       tree vec_temp;
4616
4617       /* Regardless of whether we have a whole vector shift, if we're
4618          emulating the operation via tree-vect-generic, we don't want
4619          to use it.  Only the first round of the reduction is likely
4620          to still be profitable via emulation.  */
4621       /* ??? It might be better to emit a reduction tree code here, so that
4622          tree-vect-generic can expand the first round via bit tricks.  */
4623       if (!VECTOR_MODE_P (mode))
4624         reduce_with_shift = false;
4625       else
4626         {
4627           optab optab = optab_for_tree_code (code, vectype, optab_default);
4628           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4629             reduce_with_shift = false;
4630         }
4631
4632       if (reduce_with_shift && !slp_reduc)
4633         {
4634           int nelements = vec_size_in_bits / element_bitsize;
4635           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4636
4637           int elt_offset;
4638
4639           tree zero_vec = build_zero_cst (vectype);
4640           /* Case 2: Create:
4641              for (offset = nelements/2; offset >= 1; offset/=2)
4642                 {
4643                   Create:  va' = vec_shift <va, offset>
4644                   Create:  va = vop <va, va'>
4645                 }  */
4646
4647           tree rhs;
4648
4649           if (dump_enabled_p ())
4650             dump_printf_loc (MSG_NOTE, vect_location,
4651                              "Reduce using vector shifts\n");
4652
4653           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4654           new_temp = new_phi_result;
4655           for (elt_offset = nelements / 2;
4656                elt_offset >= 1;
4657                elt_offset /= 2)
4658             {
4659               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4660               tree mask = vect_gen_perm_mask_any (vectype, sel);
4661               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4662                                                  new_temp, zero_vec, mask);
4663               new_name = make_ssa_name (vec_dest, epilog_stmt);
4664               gimple_assign_set_lhs (epilog_stmt, new_name);
4665               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4666
4667               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4668                                                  new_temp);
4669               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4670               gimple_assign_set_lhs (epilog_stmt, new_temp);
4671               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4672             }
4673
4674           /* 2.4  Extract the final scalar result.  Create:
4675              s_out3 = extract_field <v_out2, bitpos>  */
4676
4677           if (dump_enabled_p ())
4678             dump_printf_loc (MSG_NOTE, vect_location,
4679                              "extract scalar result\n");
4680
4681           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4682                         bitsize, bitsize_zero_node);
4683           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4684           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4685           gimple_assign_set_lhs (epilog_stmt, new_temp);
4686           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4687           scalar_results.safe_push (new_temp);
4688         }
4689       else
4690         {
4691           /* Case 3: Create:
4692              s = extract_field <v_out2, 0>
4693              for (offset = element_size;
4694                   offset < vector_size;
4695                   offset += element_size;)
4696                {
4697                  Create:  s' = extract_field <v_out2, offset>
4698                  Create:  s = op <s, s'>  // For non SLP cases
4699                }  */
4700
4701           if (dump_enabled_p ())
4702             dump_printf_loc (MSG_NOTE, vect_location,
4703                              "Reduce using scalar code.\n");
4704
4705           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4706           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4707             {
4708               int bit_offset;
4709               if (gimple_code (new_phi) == GIMPLE_PHI)
4710                 vec_temp = PHI_RESULT (new_phi);
4711               else
4712                 vec_temp = gimple_assign_lhs (new_phi);
4713               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4714                             bitsize_zero_node);
4715               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4716               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4717               gimple_assign_set_lhs (epilog_stmt, new_temp);
4718               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4719
4720               /* In SLP we don't need to apply reduction operation, so we just
4721                  collect s' values in SCALAR_RESULTS.  */
4722               if (slp_reduc)
4723                 scalar_results.safe_push (new_temp);
4724
4725               for (bit_offset = element_bitsize;
4726                    bit_offset < vec_size_in_bits;
4727                    bit_offset += element_bitsize)
4728                 {
4729                   tree bitpos = bitsize_int (bit_offset);
4730                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4731                                      bitsize, bitpos);
4732
4733                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4734                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4735                   gimple_assign_set_lhs (epilog_stmt, new_name);
4736                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4737
4738                   if (slp_reduc)
4739                     {
4740                       /* In SLP we don't need to apply reduction operation, so
4741                          we just collect s' values in SCALAR_RESULTS.  */
4742                       new_temp = new_name;
4743                       scalar_results.safe_push (new_name);
4744                     }
4745                   else
4746                     {
4747                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4748                                                          new_name, new_temp);
4749                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4750                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4751                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4752                     }
4753                 }
4754             }
4755
4756           /* The only case where we need to reduce scalar results in SLP, is
4757              unrolling.  If the size of SCALAR_RESULTS is greater than
4758              GROUP_SIZE, we reduce them combining elements modulo
4759              GROUP_SIZE.  */
4760           if (slp_reduc)
4761             {
4762               tree res, first_res, new_res;
4763               gimple *new_stmt;
4764
4765               /* Reduce multiple scalar results in case of SLP unrolling.  */
4766               for (j = group_size; scalar_results.iterate (j, &res);
4767                    j++)
4768                 {
4769                   first_res = scalar_results[j % group_size];
4770                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4771                                                   first_res, res);
4772                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4773                   gimple_assign_set_lhs (new_stmt, new_res);
4774                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4775                   scalar_results[j % group_size] = new_res;
4776                 }
4777             }
4778           else
4779             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4780             scalar_results.safe_push (new_temp);
4781         }
4782     }
4783
4784 vect_finalize_reduction:
4785
4786   if (double_reduc)
4787     loop = loop->inner;
4788
4789   /* 2.5 Adjust the final result by the initial value of the reduction
4790          variable. (When such adjustment is not needed, then
4791          'adjustment_def' is zero).  For example, if code is PLUS we create:
4792          new_temp = loop_exit_def + adjustment_def  */
4793
4794   if (adjustment_def)
4795     {
4796       gcc_assert (!slp_reduc);
4797       if (nested_in_vect_loop)
4798         {
4799           new_phi = new_phis[0];
4800           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4801           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4802           new_dest = vect_create_destination_var (scalar_dest, vectype);
4803         }
4804       else
4805         {
4806           new_temp = scalar_results[0];
4807           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4808           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4809           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4810         }
4811
4812       epilog_stmt = gimple_build_assign (new_dest, expr);
4813       new_temp = make_ssa_name (new_dest, epilog_stmt);
4814       gimple_assign_set_lhs (epilog_stmt, new_temp);
4815       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4816       if (nested_in_vect_loop)
4817         {
4818           set_vinfo_for_stmt (epilog_stmt,
4819                               new_stmt_vec_info (epilog_stmt, loop_vinfo));
4820           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4821                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4822
4823           if (!double_reduc)
4824             scalar_results.quick_push (new_temp);
4825           else
4826             scalar_results[0] = new_temp;
4827         }
4828       else
4829         scalar_results[0] = new_temp;
4830
4831       new_phis[0] = epilog_stmt;
4832     }
4833
4834   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4835           phis with new adjusted scalar results, i.e., replace use <s_out0>
4836           with use <s_out4>.
4837
4838      Transform:
4839         loop_exit:
4840           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4841           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4842           v_out2 = reduce <v_out1>
4843           s_out3 = extract_field <v_out2, 0>
4844           s_out4 = adjust_result <s_out3>
4845           use <s_out0>
4846           use <s_out0>
4847
4848      into:
4849
4850         loop_exit:
4851           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4852           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4853           v_out2 = reduce <v_out1>
4854           s_out3 = extract_field <v_out2, 0>
4855           s_out4 = adjust_result <s_out3>
4856           use <s_out4>
4857           use <s_out4> */
4858
4859
4860   /* In SLP reduction chain we reduce vector results into one vector if
4861      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4862      the last stmt in the reduction chain, since we are looking for the loop
4863      exit phi node.  */
4864   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4865     {
4866       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
4867       /* Handle reduction patterns.  */
4868       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
4869         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
4870
4871       scalar_dest = gimple_assign_lhs (dest_stmt);
4872       group_size = 1;
4873     }
4874
4875   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4876      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4877      need to match SCALAR_RESULTS with corresponding statements.  The first
4878      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4879      the first vector stmt, etc.
4880      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4881   if (group_size > new_phis.length ())
4882     {
4883       ratio = group_size / new_phis.length ();
4884       gcc_assert (!(group_size % new_phis.length ()));
4885     }
4886   else
4887     ratio = 1;
4888
4889   for (k = 0; k < group_size; k++)
4890     {
4891       if (k % ratio == 0)
4892         {
4893           epilog_stmt = new_phis[k / ratio];
4894           reduction_phi = reduction_phis[k / ratio];
4895           if (double_reduc)
4896             inner_phi = inner_phis[k / ratio];
4897         }
4898
4899       if (slp_reduc)
4900         {
4901           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4902
4903           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4904           /* SLP statements can't participate in patterns.  */
4905           gcc_assert (!orig_stmt);
4906           scalar_dest = gimple_assign_lhs (current_stmt);
4907         }
4908
4909       phis.create (3);
4910       /* Find the loop-closed-use at the loop exit of the original scalar
4911          result.  (The reduction result is expected to have two immediate uses -
4912          one at the latch block, and one at the loop exit).  */
4913       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4914         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4915             && !is_gimple_debug (USE_STMT (use_p)))
4916           phis.safe_push (USE_STMT (use_p));
4917
4918       /* While we expect to have found an exit_phi because of loop-closed-ssa
4919          form we can end up without one if the scalar cycle is dead.  */
4920
4921       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4922         {
4923           if (outer_loop)
4924             {
4925               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4926               gphi *vect_phi;
4927
4928               /* FORNOW. Currently not supporting the case that an inner-loop
4929                  reduction is not used in the outer-loop (but only outside the
4930                  outer-loop), unless it is double reduction.  */
4931               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4932                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4933                           || double_reduc);
4934
4935               if (double_reduc)
4936                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
4937               else
4938                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4939               if (!double_reduc
4940                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4941                       != vect_double_reduction_def)
4942                 continue;
4943
4944               /* Handle double reduction:
4945
4946                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4947                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4948                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4949                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4950
4951                  At that point the regular reduction (stmt2 and stmt3) is
4952                  already vectorized, as well as the exit phi node, stmt4.
4953                  Here we vectorize the phi node of double reduction, stmt1, and
4954                  update all relevant statements.  */
4955
4956               /* Go through all the uses of s2 to find double reduction phi
4957                  node, i.e., stmt1 above.  */
4958               orig_name = PHI_RESULT (exit_phi);
4959               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4960                 {
4961                   stmt_vec_info use_stmt_vinfo;
4962                   stmt_vec_info new_phi_vinfo;
4963                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4964                   basic_block bb = gimple_bb (use_stmt);
4965                   gimple *use;
4966
4967                   /* Check that USE_STMT is really double reduction phi
4968                      node.  */
4969                   if (gimple_code (use_stmt) != GIMPLE_PHI
4970                       || gimple_phi_num_args (use_stmt) != 2
4971                       || bb->loop_father != outer_loop)
4972                     continue;
4973                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4974                   if (!use_stmt_vinfo
4975                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4976                           != vect_double_reduction_def)
4977                     continue;
4978
4979                   /* Create vector phi node for double reduction:
4980                      vs1 = phi <vs0, vs2>
4981                      vs1 was created previously in this function by a call to
4982                        vect_get_vec_def_for_operand and is stored in
4983                        vec_initial_def;
4984                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4985                      vs0 is created here.  */
4986
4987                   /* Create vector phi node.  */
4988                   vect_phi = create_phi_node (vec_initial_def, bb);
4989                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4990                                     loop_vec_info_for_loop (outer_loop));
4991                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4992
4993                   /* Create vs0 - initial def of the double reduction phi.  */
4994                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4995                                              loop_preheader_edge (outer_loop));
4996                   init_def = get_initial_def_for_reduction (stmt,
4997                                                           preheader_arg, NULL);
4998                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4999                                                     vectype, NULL);
5000
5001                   /* Update phi node arguments with vs0 and vs2.  */
5002                   add_phi_arg (vect_phi, vect_phi_init,
5003                                loop_preheader_edge (outer_loop),
5004                                UNKNOWN_LOCATION);
5005                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
5006                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5007                   if (dump_enabled_p ())
5008                     {
5009                       dump_printf_loc (MSG_NOTE, vect_location,
5010                                        "created double reduction phi node: ");
5011                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5012                     }
5013
5014                   vect_phi_res = PHI_RESULT (vect_phi);
5015
5016                   /* Replace the use, i.e., set the correct vs1 in the regular
5017                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
5018                      loop is redundant.  */
5019                   use = reduction_phi;
5020                   for (j = 0; j < ncopies; j++)
5021                     {
5022                       edge pr_edge = loop_preheader_edge (loop);
5023                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
5024                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
5025                     }
5026                 }
5027             }
5028         }
5029
5030       phis.release ();
5031       if (nested_in_vect_loop)
5032         {
5033           if (double_reduc)
5034             loop = outer_loop;
5035           else
5036             continue;
5037         }
5038
5039       phis.create (3);
5040       /* Find the loop-closed-use at the loop exit of the original scalar
5041          result.  (The reduction result is expected to have two immediate uses,
5042          one at the latch block, and one at the loop exit).  For double
5043          reductions we are looking for exit phis of the outer loop.  */
5044       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5045         {
5046           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5047             {
5048               if (!is_gimple_debug (USE_STMT (use_p)))
5049                 phis.safe_push (USE_STMT (use_p));
5050             }
5051           else
5052             {
5053               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
5054                 {
5055                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
5056
5057                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
5058                     {
5059                       if (!flow_bb_inside_loop_p (loop,
5060                                              gimple_bb (USE_STMT (phi_use_p)))
5061                           && !is_gimple_debug (USE_STMT (phi_use_p)))
5062                         phis.safe_push (USE_STMT (phi_use_p));
5063                     }
5064                 }
5065             }
5066         }
5067
5068       FOR_EACH_VEC_ELT (phis, i, exit_phi)
5069         {
5070           /* Replace the uses:  */
5071           orig_name = PHI_RESULT (exit_phi);
5072           scalar_result = scalar_results[k];
5073           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5074             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
5075               SET_USE (use_p, scalar_result);
5076         }
5077
5078       phis.release ();
5079     }
5080 }
5081
5082
5083 /* Function is_nonwrapping_integer_induction.
5084
5085    Check if STMT (which is part of loop LOOP) both increments and
5086    does not cause overflow.  */
5087
5088 static bool
5089 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop)
5090 {
5091   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
5092   tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5093   tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5094   tree lhs_type = TREE_TYPE (gimple_phi_result (stmt));
5095   widest_int ni, max_loop_value, lhs_max;
5096   bool overflow = false;
5097
5098   /* Make sure the loop is integer based.  */
5099   if (TREE_CODE (base) != INTEGER_CST
5100       || TREE_CODE (step) != INTEGER_CST)
5101     return false;
5102
5103   /* Check that the induction increments.  */
5104   if (tree_int_cst_sgn (step) == -1)
5105     return false;
5106
5107   /* Check that the max size of the loop will not wrap.  */
5108
5109   if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5110     return true;
5111
5112   if (! max_stmt_executions (loop, &ni))
5113     return false;
5114
5115   max_loop_value = wi::mul (wi::to_widest (step), ni, TYPE_SIGN (lhs_type),
5116                             &overflow);
5117   if (overflow)
5118     return false;
5119
5120   max_loop_value = wi::add (wi::to_widest (base), max_loop_value,
5121                             TYPE_SIGN (lhs_type), &overflow);
5122   if (overflow)
5123     return false;
5124
5125   return (wi::min_precision (max_loop_value, TYPE_SIGN (lhs_type))
5126           <= TYPE_PRECISION (lhs_type));
5127 }
5128
5129 /* Function vectorizable_reduction.
5130
5131    Check if STMT performs a reduction operation that can be vectorized.
5132    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5133    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5134    Return FALSE if not a vectorizable STMT, TRUE otherwise.
5135
5136    This function also handles reduction idioms (patterns) that have been
5137    recognized in advance during vect_pattern_recog.  In this case, STMT may be
5138    of this form:
5139      X = pattern_expr (arg0, arg1, ..., X)
5140    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
5141    sequence that had been detected and replaced by the pattern-stmt (STMT).
5142
5143    This function also handles reduction of condition expressions, for example:
5144      for (int i = 0; i < N; i++)
5145        if (a[i] < value)
5146          last = a[i];
5147    This is handled by vectorising the loop and creating an additional vector
5148    containing the loop indexes for which "a[i] < value" was true.  In the
5149    function epilogue this is reduced to a single max value and then used to
5150    index into the vector of results.
5151
5152    In some cases of reduction patterns, the type of the reduction variable X is
5153    different than the type of the other arguments of STMT.
5154    In such cases, the vectype that is used when transforming STMT into a vector
5155    stmt is different than the vectype that is used to determine the
5156    vectorization factor, because it consists of a different number of elements
5157    than the actual number of elements that are being operated upon in parallel.
5158
5159    For example, consider an accumulation of shorts into an int accumulator.
5160    On some targets it's possible to vectorize this pattern operating on 8
5161    shorts at a time (hence, the vectype for purposes of determining the
5162    vectorization factor should be V8HI); on the other hand, the vectype that
5163    is used to create the vector form is actually V4SI (the type of the result).
5164
5165    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
5166    indicates what is the actual level of parallelism (V8HI in the example), so
5167    that the right vectorization factor would be derived.  This vectype
5168    corresponds to the type of arguments to the reduction stmt, and should *NOT*
5169    be used to create the vectorized stmt.  The right vectype for the vectorized
5170    stmt is obtained from the type of the result X:
5171         get_vectype_for_scalar_type (TREE_TYPE (X))
5172
5173    This means that, contrary to "regular" reductions (or "regular" stmts in
5174    general), the following equation:
5175       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5176    does *NOT* necessarily hold for reduction patterns.  */
5177
5178 bool
5179 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
5180                         gimple **vec_stmt, slp_tree slp_node)
5181 {
5182   tree vec_dest;
5183   tree scalar_dest;
5184   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
5185   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5186   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5187   tree vectype_in = NULL_TREE;
5188   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5189   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5190   enum tree_code code, orig_code, epilog_reduc_code;
5191   machine_mode vec_mode;
5192   int op_type;
5193   optab optab, reduc_optab;
5194   tree new_temp = NULL_TREE;
5195   gimple *def_stmt;
5196   enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
5197   gphi *new_phi = NULL;
5198   tree scalar_type;
5199   bool is_simple_use;
5200   gimple *orig_stmt;
5201   stmt_vec_info orig_stmt_info;
5202   tree expr = NULL_TREE;
5203   int i;
5204   int ncopies;
5205   int epilog_copies;
5206   stmt_vec_info prev_stmt_info, prev_phi_info;
5207   bool single_defuse_cycle = false;
5208   tree reduc_def = NULL_TREE;
5209   gimple *new_stmt = NULL;
5210   int j;
5211   tree ops[3];
5212   bool nested_cycle = false, found_nested_cycle_def = false;
5213   gimple *reduc_def_stmt = NULL;
5214   bool double_reduc = false;
5215   basic_block def_bb;
5216   struct loop * def_stmt_loop, *outer_loop = NULL;
5217   tree def_arg;
5218   gimple *def_arg_stmt;
5219   auto_vec<tree> vec_oprnds0;
5220   auto_vec<tree> vec_oprnds1;
5221   auto_vec<tree> vect_defs;
5222   auto_vec<gimple *> phis;
5223   int vec_num;
5224   tree def0, def1, tem, op1 = NULL_TREE;
5225   bool first_p = true;
5226   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5227   tree cond_reduc_val = NULL_TREE;
5228
5229   /* In case of reduction chain we switch to the first stmt in the chain, but
5230      we don't update STMT_INFO, since only the last stmt is marked as reduction
5231      and has reduction properties.  */
5232   if (GROUP_FIRST_ELEMENT (stmt_info)
5233       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
5234     {
5235       stmt = GROUP_FIRST_ELEMENT (stmt_info);
5236       first_p = false;
5237     }
5238
5239   if (nested_in_vect_loop_p (loop, stmt))
5240     {
5241       outer_loop = loop;
5242       loop = loop->inner;
5243       nested_cycle = true;
5244     }
5245
5246   /* 1. Is vectorizable reduction?  */
5247   /* Not supportable if the reduction variable is used in the loop, unless
5248      it's a reduction chain.  */
5249   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5250       && !GROUP_FIRST_ELEMENT (stmt_info))
5251     return false;
5252
5253   /* Reductions that are not used even in an enclosing outer-loop,
5254      are expected to be "live" (used out of the loop).  */
5255   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5256       && !STMT_VINFO_LIVE_P (stmt_info))
5257     return false;
5258
5259   /* Make sure it was already recognized as a reduction computation.  */
5260   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
5261       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
5262     return false;
5263
5264   /* 2. Has this been recognized as a reduction pattern?
5265
5266      Check if STMT represents a pattern that has been recognized
5267      in earlier analysis stages.  For stmts that represent a pattern,
5268      the STMT_VINFO_RELATED_STMT field records the last stmt in
5269      the original sequence that constitutes the pattern.  */
5270
5271   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
5272   if (orig_stmt)
5273     {
5274       orig_stmt_info = vinfo_for_stmt (orig_stmt);
5275       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5276       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5277     }
5278
5279   /* 3. Check the operands of the operation.  The first operands are defined
5280         inside the loop body. The last operand is the reduction variable,
5281         which is defined by the loop-header-phi.  */
5282
5283   gcc_assert (is_gimple_assign (stmt));
5284
5285   /* Flatten RHS.  */
5286   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5287     {
5288     case GIMPLE_SINGLE_RHS:
5289       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
5290       if (op_type == ternary_op)
5291         {
5292           tree rhs = gimple_assign_rhs1 (stmt);
5293           ops[0] = TREE_OPERAND (rhs, 0);
5294           ops[1] = TREE_OPERAND (rhs, 1);
5295           ops[2] = TREE_OPERAND (rhs, 2);
5296           code = TREE_CODE (rhs);
5297         }
5298       else
5299         return false;
5300       break;
5301
5302     case GIMPLE_BINARY_RHS:
5303       code = gimple_assign_rhs_code (stmt);
5304       op_type = TREE_CODE_LENGTH (code);
5305       gcc_assert (op_type == binary_op);
5306       ops[0] = gimple_assign_rhs1 (stmt);
5307       ops[1] = gimple_assign_rhs2 (stmt);
5308       break;
5309
5310     case GIMPLE_TERNARY_RHS:
5311       code = gimple_assign_rhs_code (stmt);
5312       op_type = TREE_CODE_LENGTH (code);
5313       gcc_assert (op_type == ternary_op);
5314       ops[0] = gimple_assign_rhs1 (stmt);
5315       ops[1] = gimple_assign_rhs2 (stmt);
5316       ops[2] = gimple_assign_rhs3 (stmt);
5317       break;
5318
5319     case GIMPLE_UNARY_RHS:
5320       return false;
5321
5322     default:
5323       gcc_unreachable ();
5324     }
5325   /* The default is that the reduction variable is the last in statement.  */
5326   int reduc_index = op_type - 1;
5327   if (code == MINUS_EXPR)
5328     reduc_index = 0;
5329
5330   if (code == COND_EXPR && slp_node)
5331     return false;
5332
5333   scalar_dest = gimple_assign_lhs (stmt);
5334   scalar_type = TREE_TYPE (scalar_dest);
5335   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5336       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5337     return false;
5338
5339   /* Do not try to vectorize bit-precision reductions.  */
5340   if ((TYPE_PRECISION (scalar_type)
5341        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5342     return false;
5343
5344   /* All uses but the last are expected to be defined in the loop.
5345      The last use is the reduction variable.  In case of nested cycle this
5346      assumption is not true: we use reduc_index to record the index of the
5347      reduction variable.  */
5348   for (i = 0; i < op_type; i++)
5349     {
5350       if (i == reduc_index)
5351         continue;
5352
5353       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5354       if (i == 0 && code == COND_EXPR)
5355         continue;
5356
5357       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
5358                                           &def_stmt, &dt, &tem);
5359       if (!vectype_in)
5360         vectype_in = tem;
5361       gcc_assert (is_simple_use);
5362
5363       if (dt != vect_internal_def
5364           && dt != vect_external_def
5365           && dt != vect_constant_def
5366           && dt != vect_induction_def
5367           && !(dt == vect_nested_cycle && nested_cycle))
5368         return false;
5369
5370       if (dt == vect_nested_cycle)
5371         {
5372           found_nested_cycle_def = true;
5373           reduc_def_stmt = def_stmt;
5374           reduc_index = i;
5375         }
5376
5377       if (i == 1 && code == COND_EXPR)
5378         {
5379           /* Record how value of COND_EXPR is defined.  */
5380           if (dt == vect_constant_def)
5381             {
5382               cond_reduc_dt = dt;
5383               cond_reduc_val = ops[i];
5384             }
5385           if (dt == vect_induction_def && def_stmt != NULL
5386               && is_nonwrapping_integer_induction (def_stmt, loop))
5387             cond_reduc_dt = dt;
5388         }
5389     }
5390
5391   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
5392                                       &def_stmt, &dt, &tem);
5393   if (!vectype_in)
5394     vectype_in = tem;
5395   gcc_assert (is_simple_use);
5396   if (!found_nested_cycle_def)
5397     reduc_def_stmt = def_stmt;
5398
5399   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5400     return false;
5401
5402   if (!(dt == vect_reduction_def
5403         || dt == vect_nested_cycle
5404         || ((dt == vect_internal_def || dt == vect_external_def
5405              || dt == vect_constant_def || dt == vect_induction_def)
5406             && nested_cycle && found_nested_cycle_def)))
5407     {
5408       /* For pattern recognized stmts, orig_stmt might be a reduction,
5409          but some helper statements for the pattern might not, or
5410          might be COND_EXPRs with reduction uses in the condition.  */
5411       gcc_assert (orig_stmt);
5412       return false;
5413     }
5414
5415   stmt_vec_info reduc_def_info = vinfo_for_stmt (reduc_def_stmt);
5416   enum vect_reduction_type v_reduc_type
5417     = STMT_VINFO_REDUC_TYPE (reduc_def_info);
5418   gimple *tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
5419
5420   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5421   /* If we have a condition reduction, see if we can simplify it further.  */
5422   if (v_reduc_type == COND_REDUCTION)
5423     {
5424       if (cond_reduc_dt == vect_induction_def)
5425         {
5426           if (dump_enabled_p ())
5427             dump_printf_loc (MSG_NOTE, vect_location,
5428                              "condition expression based on "
5429                              "integer induction.\n");
5430           STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5431             = INTEGER_INDUC_COND_REDUCTION;
5432         }
5433
5434       /* Loop peeling modifies initial value of reduction PHI, which
5435          makes the reduction stmt to be transformed different to the
5436          original stmt analyzed.  We need to record reduction code for
5437          CONST_COND_REDUCTION type reduction at analyzing stage, thus
5438          it can be used directly at transform stage.  */
5439       if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
5440           || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
5441         {
5442           /* Also set the reduction type to CONST_COND_REDUCTION.  */
5443           gcc_assert (cond_reduc_dt == vect_constant_def);
5444           STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
5445         }
5446       else if (cond_reduc_dt == vect_constant_def)
5447         {
5448           enum vect_def_type cond_initial_dt;
5449           gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
5450           tree cond_initial_val
5451             = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
5452
5453           gcc_assert (cond_reduc_val != NULL_TREE);
5454           vect_is_simple_use (cond_initial_val, loop_vinfo,
5455                               &def_stmt, &cond_initial_dt);
5456           if (cond_initial_dt == vect_constant_def
5457               && types_compatible_p (TREE_TYPE (cond_initial_val),
5458                                      TREE_TYPE (cond_reduc_val)))
5459             {
5460               tree e = fold_build2 (LE_EXPR, boolean_type_node,
5461                                     cond_initial_val, cond_reduc_val);
5462               if (e && (integer_onep (e) || integer_zerop (e)))
5463                 {
5464                   if (dump_enabled_p ())
5465                     dump_printf_loc (MSG_NOTE, vect_location,
5466                                      "condition expression based on "
5467                                      "compile time constant.\n");
5468                   /* Record reduction code at analysis stage.  */
5469                   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
5470                     = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
5471                   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5472                     = CONST_COND_REDUCTION;
5473                 }
5474             }
5475         }
5476     }
5477
5478   if (orig_stmt)
5479     gcc_assert (tmp == orig_stmt
5480                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5481   else
5482     /* We changed STMT to be the first stmt in reduction chain, hence we
5483        check that in this case the first element in the chain is STMT.  */
5484     gcc_assert (stmt == tmp
5485                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5486
5487   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5488     return false;
5489
5490   if (slp_node)
5491     ncopies = 1;
5492   else
5493     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5494                / TYPE_VECTOR_SUBPARTS (vectype_in));
5495
5496   gcc_assert (ncopies >= 1);
5497
5498   vec_mode = TYPE_MODE (vectype_in);
5499
5500   if (code == COND_EXPR)
5501     {
5502       /* Only call during the analysis stage, otherwise we'll lose
5503          STMT_VINFO_TYPE.  */
5504       if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
5505                                                 ops[reduc_index], 0, NULL))
5506         {
5507           if (dump_enabled_p ())
5508             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5509                              "unsupported condition in reduction\n");
5510           return false;
5511         }
5512     }
5513   else
5514     {
5515       /* 4. Supportable by target?  */
5516
5517       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5518           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5519         {
5520           /* Shifts and rotates are only supported by vectorizable_shifts,
5521              not vectorizable_reduction.  */
5522           if (dump_enabled_p ())
5523             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5524                              "unsupported shift or rotation.\n");
5525           return false;
5526         }
5527
5528       /* 4.1. check support for the operation in the loop  */
5529       optab = optab_for_tree_code (code, vectype_in, optab_default);
5530       if (!optab)
5531         {
5532           if (dump_enabled_p ())
5533             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5534                              "no optab.\n");
5535
5536           return false;
5537         }
5538
5539       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5540         {
5541           if (dump_enabled_p ())
5542             dump_printf (MSG_NOTE, "op not supported by target.\n");
5543
5544           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5545               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5546                   < vect_min_worthwhile_factor (code))
5547             return false;
5548
5549           if (dump_enabled_p ())
5550             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5551         }
5552
5553       /* Worthwhile without SIMD support?  */
5554       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5555           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5556              < vect_min_worthwhile_factor (code))
5557         {
5558           if (dump_enabled_p ())
5559             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5560                              "not worthwhile without SIMD support.\n");
5561
5562           return false;
5563         }
5564     }
5565
5566   /* 4.2. Check support for the epilog operation.
5567
5568           If STMT represents a reduction pattern, then the type of the
5569           reduction variable may be different than the type of the rest
5570           of the arguments.  For example, consider the case of accumulation
5571           of shorts into an int accumulator; The original code:
5572                         S1: int_a = (int) short_a;
5573           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5574
5575           was replaced with:
5576                         STMT: int_acc = widen_sum <short_a, int_acc>
5577
5578           This means that:
5579           1. The tree-code that is used to create the vector operation in the
5580              epilog code (that reduces the partial results) is not the
5581              tree-code of STMT, but is rather the tree-code of the original
5582              stmt from the pattern that STMT is replacing.  I.e, in the example
5583              above we want to use 'widen_sum' in the loop, but 'plus' in the
5584              epilog.
5585           2. The type (mode) we use to check available target support
5586              for the vector operation to be created in the *epilog*, is
5587              determined by the type of the reduction variable (in the example
5588              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5589              However the type (mode) we use to check available target support
5590              for the vector operation to be created *inside the loop*, is
5591              determined by the type of the other arguments to STMT (in the
5592              example we'd check this: optab_handler (widen_sum_optab,
5593              vect_short_mode)).
5594
5595           This is contrary to "regular" reductions, in which the types of all
5596           the arguments are the same as the type of the reduction variable.
5597           For "regular" reductions we can therefore use the same vector type
5598           (and also the same tree-code) when generating the epilog code and
5599           when generating the code inside the loop.  */
5600
5601   if (orig_stmt)
5602     {
5603       /* This is a reduction pattern: get the vectype from the type of the
5604          reduction variable, and get the tree-code from orig_stmt.  */
5605       gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5606                   == TREE_CODE_REDUCTION);
5607       orig_code = gimple_assign_rhs_code (orig_stmt);
5608       gcc_assert (vectype_out);
5609       vec_mode = TYPE_MODE (vectype_out);
5610     }
5611   else
5612     {
5613       /* Regular reduction: use the same vectype and tree-code as used for
5614          the vector code inside the loop can be used for the epilog code. */
5615       orig_code = code;
5616
5617       if (code == MINUS_EXPR)
5618         orig_code = PLUS_EXPR;
5619
5620       /* For simple condition reductions, replace with the actual expression
5621          we want to base our reduction around.  */
5622       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
5623         {
5624           orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
5625           gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
5626         }
5627       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5628                  == INTEGER_INDUC_COND_REDUCTION)
5629         orig_code = MAX_EXPR;
5630     }
5631
5632   if (nested_cycle)
5633     {
5634       def_bb = gimple_bb (reduc_def_stmt);
5635       def_stmt_loop = def_bb->loop_father;
5636       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5637                                        loop_preheader_edge (def_stmt_loop));
5638       if (TREE_CODE (def_arg) == SSA_NAME
5639           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5640           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5641           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5642           && vinfo_for_stmt (def_arg_stmt)
5643           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5644               == vect_double_reduction_def)
5645         double_reduc = true;
5646     }
5647
5648   epilog_reduc_code = ERROR_MARK;
5649
5650   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
5651     {
5652       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5653         {
5654           reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5655                                          optab_default);
5656           if (!reduc_optab)
5657             {
5658               if (dump_enabled_p ())
5659                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5660                                  "no optab for reduction.\n");
5661
5662               epilog_reduc_code = ERROR_MARK;
5663             }
5664           else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5665             {
5666               if (dump_enabled_p ())
5667                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5668                                  "reduc op not supported by target.\n");
5669
5670               epilog_reduc_code = ERROR_MARK;
5671             }
5672
5673           /* When epilog_reduc_code is ERROR_MARK then a reduction will be
5674              generated in the epilog using multiple expressions.  This does not
5675              work for condition reductions.  */
5676           if (epilog_reduc_code == ERROR_MARK
5677               && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5678                         == INTEGER_INDUC_COND_REDUCTION
5679                   || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5680                         == CONST_COND_REDUCTION))
5681             {
5682               if (dump_enabled_p ())
5683                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5684                                  "no reduc code for scalar code.\n");
5685               return false;
5686             }
5687         }
5688       else
5689         {
5690           if (!nested_cycle || double_reduc)
5691             {
5692               if (dump_enabled_p ())
5693                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5694                                  "no reduc code for scalar code.\n");
5695
5696               return false;
5697             }
5698         }
5699     }
5700   else
5701     {
5702       int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
5703       cr_index_scalar_type = make_unsigned_type (scalar_precision);
5704       cr_index_vector_type = build_vector_type
5705         (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
5706
5707       epilog_reduc_code = REDUC_MAX_EXPR;
5708       optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
5709                                    optab_default);
5710       if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
5711           == CODE_FOR_nothing)
5712         {
5713           if (dump_enabled_p ())
5714             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5715                              "reduc max op not supported by target.\n");
5716           return false;
5717         }
5718     }
5719
5720   if ((double_reduc
5721        || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
5722       && ncopies > 1)
5723     {
5724       if (dump_enabled_p ())
5725         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5726                          "multiple types in double reduction or condition "
5727                          "reduction.\n");
5728       return false;
5729     }
5730
5731   /* In case of widenning multiplication by a constant, we update the type
5732      of the constant to be the type of the other operand.  We check that the
5733      constant fits the type in the pattern recognition pass.  */
5734   if (code == DOT_PROD_EXPR
5735       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5736     {
5737       if (TREE_CODE (ops[0]) == INTEGER_CST)
5738         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5739       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5740         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5741       else
5742         {
5743           if (dump_enabled_p ())
5744             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5745                              "invalid types in dot-prod\n");
5746
5747           return false;
5748         }
5749     }
5750
5751   if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
5752     {
5753       widest_int ni;
5754
5755       if (! max_loop_iterations (loop, &ni))
5756         {
5757           if (dump_enabled_p ())
5758             dump_printf_loc (MSG_NOTE, vect_location,
5759                              "loop count not known, cannot create cond "
5760                              "reduction.\n");
5761           return false;
5762         }
5763       /* Convert backedges to iterations.  */
5764       ni += 1;
5765
5766       /* The additional index will be the same type as the condition.  Check
5767          that the loop can fit into this less one (because we'll use up the
5768          zero slot for when there are no matches).  */
5769       tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
5770       if (wi::geu_p (ni, wi::to_widest (max_index)))
5771         {
5772           if (dump_enabled_p ())
5773             dump_printf_loc (MSG_NOTE, vect_location,
5774                              "loop size is greater than data size.\n");
5775           return false;
5776         }
5777     }
5778
5779   if (!vec_stmt) /* transformation not required.  */
5780     {
5781       if (first_p
5782           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5783                                          reduc_index))
5784         return false;
5785       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5786       return true;
5787     }
5788
5789   /* Transform.  */
5790
5791   if (dump_enabled_p ())
5792     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5793
5794   /* FORNOW: Multiple types are not supported for condition.  */
5795   if (code == COND_EXPR)
5796     gcc_assert (ncopies == 1);
5797
5798   /* Create the destination vector  */
5799   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5800
5801   /* In case the vectorization factor (VF) is bigger than the number
5802      of elements that we can fit in a vectype (nunits), we have to generate
5803      more than one vector stmt - i.e - we need to "unroll" the
5804      vector stmt by a factor VF/nunits.  For more details see documentation
5805      in vectorizable_operation.  */
5806
5807   /* If the reduction is used in an outer loop we need to generate
5808      VF intermediate results, like so (e.g. for ncopies=2):
5809         r0 = phi (init, r0)
5810         r1 = phi (init, r1)
5811         r0 = x0 + r0;
5812         r1 = x1 + r1;
5813     (i.e. we generate VF results in 2 registers).
5814     In this case we have a separate def-use cycle for each copy, and therefore
5815     for each copy we get the vector def for the reduction variable from the
5816     respective phi node created for this copy.
5817
5818     Otherwise (the reduction is unused in the loop nest), we can combine
5819     together intermediate results, like so (e.g. for ncopies=2):
5820         r = phi (init, r)
5821         r = x0 + r;
5822         r = x1 + r;
5823    (i.e. we generate VF/2 results in a single register).
5824    In this case for each copy we get the vector def for the reduction variable
5825    from the vectorized reduction operation generated in the previous iteration.
5826   */
5827
5828   if (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
5829     {
5830       single_defuse_cycle = true;
5831       epilog_copies = 1;
5832     }
5833   else
5834     epilog_copies = ncopies;
5835
5836   prev_stmt_info = NULL;
5837   prev_phi_info = NULL;
5838   if (slp_node)
5839     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5840   else
5841     {
5842       vec_num = 1;
5843       vec_oprnds0.create (1);
5844       if (op_type == ternary_op)
5845         vec_oprnds1.create (1);
5846     }
5847
5848   phis.create (vec_num);
5849   vect_defs.create (vec_num);
5850   if (!slp_node)
5851     vect_defs.quick_push (NULL_TREE);
5852
5853   for (j = 0; j < ncopies; j++)
5854     {
5855       if (j == 0 || !single_defuse_cycle)
5856         {
5857           for (i = 0; i < vec_num; i++)
5858             {
5859               /* Create the reduction-phi that defines the reduction
5860                  operand.  */
5861               new_phi = create_phi_node (vec_dest, loop->header);
5862               set_vinfo_for_stmt (new_phi,
5863                                   new_stmt_vec_info (new_phi, loop_vinfo));
5864                if (j == 0 || slp_node)
5865                  phis.quick_push (new_phi);
5866             }
5867         }
5868
5869       if (code == COND_EXPR)
5870         {
5871           gcc_assert (!slp_node);
5872           vectorizable_condition (stmt, gsi, vec_stmt,
5873                                   PHI_RESULT (phis[0]),
5874                                   reduc_index, NULL);
5875           /* Multiple types are not supported for condition.  */
5876           break;
5877         }
5878
5879       /* Handle uses.  */
5880       if (j == 0)
5881         {
5882           if (slp_node)
5883             {
5884               /* Get vec defs for all the operands except the reduction index,
5885                  ensuring the ordering of the ops in the vector is kept.  */
5886               auto_vec<tree, 3> slp_ops;
5887               auto_vec<vec<tree>, 3> vec_defs;
5888
5889               slp_ops.quick_push (reduc_index == 0 ? NULL : ops[0]);
5890               slp_ops.quick_push (reduc_index == 1 ? NULL : ops[1]);
5891               if (op_type == ternary_op)
5892                 slp_ops.quick_push (reduc_index == 2 ? NULL : ops[2]);
5893
5894               vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1);
5895
5896               vec_oprnds0.safe_splice (vec_defs[reduc_index == 0 ? 1 : 0]);
5897               vec_defs[reduc_index == 0 ? 1 : 0].release ();
5898               if (op_type == ternary_op)
5899                 {
5900                   vec_oprnds1.safe_splice (vec_defs[reduc_index == 2 ? 1 : 2]);
5901                   vec_defs[reduc_index == 2 ? 1 : 2].release ();
5902                 }
5903             }
5904           else
5905             {
5906               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5907                                                             stmt);
5908               vec_oprnds0.quick_push (loop_vec_def0);
5909               if (op_type == ternary_op)
5910                {
5911                  op1 = reduc_index == 0 ? ops[2] : ops[1];
5912                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
5913                  vec_oprnds1.quick_push (loop_vec_def1);
5914                }
5915             }
5916         }
5917       else
5918         {
5919           if (!slp_node)
5920             {
5921               enum vect_def_type dt;
5922               gimple *dummy_stmt;
5923
5924               vect_is_simple_use (ops[!reduc_index], loop_vinfo,
5925                                   &dummy_stmt, &dt);
5926               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5927                                                               loop_vec_def0);
5928               vec_oprnds0[0] = loop_vec_def0;
5929               if (op_type == ternary_op)
5930                 {
5931                   vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
5932                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5933                                                                 loop_vec_def1);
5934                   vec_oprnds1[0] = loop_vec_def1;
5935                 }
5936             }
5937
5938           if (single_defuse_cycle)
5939             reduc_def = gimple_assign_lhs (new_stmt);
5940
5941           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5942         }
5943
5944       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5945         {
5946           if (slp_node)
5947             reduc_def = PHI_RESULT (phis[i]);
5948           else
5949             {
5950               if (!single_defuse_cycle || j == 0)
5951                 reduc_def = PHI_RESULT (new_phi);
5952             }
5953
5954           def1 = ((op_type == ternary_op)
5955                   ? vec_oprnds1[i] : NULL);
5956           if (op_type == binary_op)
5957             {
5958               if (reduc_index == 0)
5959                 expr = build2 (code, vectype_out, reduc_def, def0);
5960               else
5961                 expr = build2 (code, vectype_out, def0, reduc_def);
5962             }
5963           else
5964             {
5965               if (reduc_index == 0)
5966                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5967               else
5968                 {
5969                   if (reduc_index == 1)
5970                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5971                   else
5972                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5973                 }
5974             }
5975
5976           new_stmt = gimple_build_assign (vec_dest, expr);
5977           new_temp = make_ssa_name (vec_dest, new_stmt);
5978           gimple_assign_set_lhs (new_stmt, new_temp);
5979           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5980
5981           if (slp_node)
5982             {
5983               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5984               vect_defs.quick_push (new_temp);
5985             }
5986           else
5987             vect_defs[0] = new_temp;
5988         }
5989
5990       if (slp_node)
5991         continue;
5992
5993       if (j == 0)
5994         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5995       else
5996         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5997
5998       prev_stmt_info = vinfo_for_stmt (new_stmt);
5999       prev_phi_info = vinfo_for_stmt (new_phi);
6000     }
6001
6002   tree indx_before_incr, indx_after_incr, cond_name = NULL;
6003
6004   /* Finalize the reduction-phi (set its arguments) and create the
6005      epilog reduction code.  */
6006   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6007     {
6008       new_temp = gimple_assign_lhs (*vec_stmt);
6009       vect_defs[0] = new_temp;
6010
6011       /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
6012          which is updated with the current index of the loop for every match of
6013          the original loop's cond_expr (VEC_STMT).  This results in a vector
6014          containing the last time the condition passed for that vector lane.
6015          The first match will be a 1 to allow 0 to be used for non-matching
6016          indexes.  If there are no matches at all then the vector will be all
6017          zeroes.  */
6018       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
6019         {
6020           int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6021           int k;
6022
6023           gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
6024
6025           /* First we create a simple vector induction variable which starts
6026              with the values {1,2,3,...} (SERIES_VECT) and increments by the
6027              vector size (STEP).  */
6028
6029           /* Create a {1,2,3,...} vector.  */
6030           tree *vtemp = XALLOCAVEC (tree, nunits_out);
6031           for (k = 0; k < nunits_out; ++k)
6032             vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
6033           tree series_vect = build_vector (cr_index_vector_type, vtemp);
6034
6035           /* Create a vector of the step value.  */
6036           tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6037           tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6038
6039           /* Create an induction variable.  */
6040           gimple_stmt_iterator incr_gsi;
6041           bool insert_after;
6042           standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6043           create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
6044                      insert_after, &indx_before_incr, &indx_after_incr);
6045
6046           /* Next create a new phi node vector (NEW_PHI_TREE) which starts
6047              filled with zeros (VEC_ZERO).  */
6048
6049           /* Create a vector of 0s.  */
6050           tree zero = build_zero_cst (cr_index_scalar_type);
6051           tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6052
6053           /* Create a vector phi node.  */
6054           tree new_phi_tree = make_ssa_name (cr_index_vector_type);
6055           new_phi = create_phi_node (new_phi_tree, loop->header);
6056           set_vinfo_for_stmt (new_phi,
6057                               new_stmt_vec_info (new_phi, loop_vinfo));
6058           add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
6059                        UNKNOWN_LOCATION);
6060
6061           /* Now take the condition from the loops original cond_expr
6062              (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
6063              every match uses values from the induction variable
6064              (INDEX_BEFORE_INCR) otherwise uses values from the phi node
6065              (NEW_PHI_TREE).
6066              Finally, we update the phi (NEW_PHI_TREE) to take the value of
6067              the new cond_expr (INDEX_COND_EXPR).  */
6068
6069           /* Duplicate the condition from vec_stmt.  */
6070           tree ccompare = unshare_expr (gimple_assign_rhs1 (*vec_stmt));
6071
6072           /* Create a conditional, where the condition is taken from vec_stmt
6073              (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
6074              else is the phi (NEW_PHI_TREE).  */
6075           tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
6076                                          ccompare, indx_before_incr,
6077                                          new_phi_tree);
6078           cond_name = make_ssa_name (cr_index_vector_type);
6079           gimple *index_condition = gimple_build_assign (cond_name,
6080                                                          index_cond_expr);
6081           gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
6082           stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
6083                                                             loop_vinfo);
6084           STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
6085           set_vinfo_for_stmt (index_condition, index_vec_info);
6086
6087           /* Update the phi with the vec cond.  */
6088           add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
6089                        UNKNOWN_LOCATION);
6090         }
6091     }
6092
6093   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
6094                                     epilog_reduc_code, phis, reduc_index,
6095                                     double_reduc, slp_node, cond_name);
6096
6097   return true;
6098 }
6099
6100 /* Function vect_min_worthwhile_factor.
6101
6102    For a loop where we could vectorize the operation indicated by CODE,
6103    return the minimum vectorization factor that makes it worthwhile
6104    to use generic vectors.  */
6105 int
6106 vect_min_worthwhile_factor (enum tree_code code)
6107 {
6108   switch (code)
6109     {
6110     case PLUS_EXPR:
6111     case MINUS_EXPR:
6112     case NEGATE_EXPR:
6113       return 4;
6114
6115     case BIT_AND_EXPR:
6116     case BIT_IOR_EXPR:
6117     case BIT_XOR_EXPR:
6118     case BIT_NOT_EXPR:
6119       return 2;
6120
6121     default:
6122       return INT_MAX;
6123     }
6124 }
6125
6126
6127 /* Function vectorizable_induction
6128
6129    Check if PHI performs an induction computation that can be vectorized.
6130    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6131    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6132    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6133
6134 bool
6135 vectorizable_induction (gimple *phi,
6136                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6137                         gimple **vec_stmt, slp_tree slp_node)
6138 {
6139   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6140   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6141   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6142   unsigned ncopies;
6143   bool nested_in_vect_loop = false;
6144   struct loop *iv_loop;
6145   tree vec_def;
6146   edge pe = loop_preheader_edge (loop);
6147   basic_block new_bb;
6148   tree new_vec, vec_init, vec_step, t;
6149   tree new_name;
6150   gimple *new_stmt;
6151   gphi *induction_phi;
6152   tree induc_def, vec_dest;
6153   tree init_expr, step_expr;
6154   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6155   unsigned i;
6156   tree expr;
6157   gimple_seq stmts;
6158   imm_use_iterator imm_iter;
6159   use_operand_p use_p;
6160   gimple *exit_phi;
6161   edge latch_e;
6162   tree loop_arg;
6163   gimple_stmt_iterator si;
6164   basic_block bb = gimple_bb (phi);
6165
6166   if (gimple_code (phi) != GIMPLE_PHI)
6167     return false;
6168
6169   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6170     return false;
6171
6172   /* Make sure it was recognized as induction computation.  */
6173   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
6174     return false;
6175
6176   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6177   unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
6178
6179   if (slp_node)
6180     ncopies = 1;
6181   else
6182     ncopies = vf / nunits;
6183   gcc_assert (ncopies >= 1);
6184
6185   /* FORNOW. These restrictions should be relaxed.  */
6186   if (nested_in_vect_loop_p (loop, phi))
6187     {
6188       imm_use_iterator imm_iter;
6189       use_operand_p use_p;
6190       gimple *exit_phi;
6191       edge latch_e;
6192       tree loop_arg;
6193
6194       if (ncopies > 1)
6195         {
6196           if (dump_enabled_p ())
6197             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6198                              "multiple types in nested loop.\n");
6199           return false;
6200         }
6201
6202       /* FORNOW: outer loop induction with SLP not supported.  */
6203       if (STMT_SLP_TYPE (stmt_info))
6204         return false;
6205
6206       exit_phi = NULL;
6207       latch_e = loop_latch_edge (loop->inner);
6208       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6209       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6210         {
6211           gimple *use_stmt = USE_STMT (use_p);
6212           if (is_gimple_debug (use_stmt))
6213             continue;
6214
6215           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
6216             {
6217               exit_phi = use_stmt;
6218               break;
6219             }
6220         }
6221       if (exit_phi)
6222         {
6223           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
6224           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6225                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6226             {
6227               if (dump_enabled_p ())
6228                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6229                                  "inner-loop induction only used outside "
6230                                  "of the outer vectorized loop.\n");
6231               return false;
6232             }
6233         }
6234
6235       nested_in_vect_loop = true;
6236       iv_loop = loop->inner;
6237     }
6238   else
6239     iv_loop = loop;
6240   gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
6241
6242   if (!vec_stmt) /* transformation not required.  */
6243     {
6244       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6245       if (dump_enabled_p ())
6246         dump_printf_loc (MSG_NOTE, vect_location,
6247                          "=== vectorizable_induction ===\n");
6248       vect_model_induction_cost (stmt_info, ncopies);
6249       return true;
6250     }
6251
6252   /* Transform.  */
6253
6254   /* Compute a vector variable, initialized with the first VF values of
6255      the induction variable.  E.g., for an iv with IV_PHI='X' and
6256      evolution S, for a vector of 4 units, we want to compute:
6257      [X, X + S, X + 2*S, X + 3*S].  */
6258
6259   if (dump_enabled_p ())
6260     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
6261
6262   latch_e = loop_latch_edge (iv_loop);
6263   loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
6264
6265   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
6266   gcc_assert (step_expr != NULL_TREE);
6267
6268   pe = loop_preheader_edge (iv_loop);
6269   init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
6270                                      loop_preheader_edge (iv_loop));
6271
6272   /* Convert the step to the desired type.  */
6273   stmts = NULL;
6274   step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
6275   if (stmts)
6276     {
6277       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6278       gcc_assert (!new_bb);
6279     }
6280
6281   /* Find the first insertion point in the BB.  */
6282   si = gsi_after_labels (bb);
6283
6284   /* For SLP induction we have to generate several IVs as for example
6285      with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
6286      [i + 2*S, i + 3*S, i + 3*S, i + 3*S].  The step is the same uniform
6287      [VF*S, VF*S, VF*S, VF*S] for all.  */
6288   if (slp_node)
6289     {
6290       /* Convert the init to the desired type.  */
6291       stmts = NULL;
6292       init_expr = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
6293       if (stmts)
6294         {
6295           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6296           gcc_assert (!new_bb);
6297         }
6298
6299       /* Generate [VF*S, VF*S, ... ].  */
6300       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6301         {
6302           expr = build_int_cst (integer_type_node, vf);
6303           expr = fold_convert (TREE_TYPE (step_expr), expr);
6304         }
6305       else
6306         expr = build_int_cst (TREE_TYPE (step_expr), vf);
6307       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6308                               expr, step_expr);
6309       if (! CONSTANT_CLASS_P (new_name))
6310         new_name = vect_init_vector (phi, new_name,
6311                                      TREE_TYPE (step_expr), NULL);
6312       new_vec = build_vector_from_val (vectype, new_name);
6313       vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6314
6315       /* Now generate the IVs.  */
6316       unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6317       unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6318       unsigned elts = nunits * nvects;
6319       unsigned nivs = least_common_multiple (group_size, nunits) / nunits;
6320       gcc_assert (elts % group_size == 0);
6321       tree elt = init_expr;
6322       unsigned ivn;
6323       for (ivn = 0; ivn < nivs; ++ivn)
6324         {
6325           tree *elts = XALLOCAVEC (tree, nunits);
6326           bool constant_p = true;
6327           for (unsigned eltn = 0; eltn < nunits; ++eltn)
6328             {
6329               if (ivn*nunits + eltn >= group_size
6330                   && (ivn*nunits + eltn) % group_size == 0)
6331                 {
6332                   stmts = NULL;
6333                   elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
6334                                       elt, step_expr);
6335                   if (stmts)
6336                     {
6337                       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6338                       gcc_assert (!new_bb);
6339                     }
6340                 }
6341               if (! CONSTANT_CLASS_P (elt))
6342                 constant_p = false;
6343               elts[eltn] = elt;
6344             }
6345           if (constant_p)
6346             new_vec = build_vector (vectype, elts);
6347           else
6348             {
6349               vec<constructor_elt, va_gc> *v;
6350               vec_alloc (v, nunits);
6351               for (i = 0; i < nunits; ++i)
6352                 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
6353               new_vec = build_constructor (vectype, v);
6354             }
6355           vec_init = vect_init_vector (phi, new_vec, vectype, NULL);
6356
6357           /* Create the induction-phi that defines the induction-operand.  */
6358           vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
6359           induction_phi = create_phi_node (vec_dest, iv_loop->header);
6360           set_vinfo_for_stmt (induction_phi,
6361                               new_stmt_vec_info (induction_phi, loop_vinfo));
6362           induc_def = PHI_RESULT (induction_phi);
6363
6364           /* Create the iv update inside the loop  */
6365           vec_def = make_ssa_name (vec_dest);
6366           new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
6367           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6368           set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
6369
6370           /* Set the arguments of the phi node:  */
6371           add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
6372           add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
6373                        UNKNOWN_LOCATION);
6374
6375           SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
6376         }
6377
6378       /* Re-use IVs when we can.  */
6379       if (ivn < nvects)
6380         {
6381           unsigned vfp
6382             = least_common_multiple (group_size, nunits) / group_size;
6383           /* Generate [VF'*S, VF'*S, ... ].  */
6384           if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6385             {
6386               expr = build_int_cst (integer_type_node, vfp);
6387               expr = fold_convert (TREE_TYPE (step_expr), expr);
6388             }
6389           else
6390             expr = build_int_cst (TREE_TYPE (step_expr), vfp);
6391           new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6392                                   expr, step_expr);
6393           if (! CONSTANT_CLASS_P (new_name))
6394             new_name = vect_init_vector (phi, new_name,
6395                                          TREE_TYPE (step_expr), NULL);
6396           new_vec = build_vector_from_val (vectype, new_name);
6397           vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6398           for (; ivn < nvects; ++ivn)
6399             {
6400               gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
6401               tree def;
6402               if (gimple_code (iv) == GIMPLE_PHI)
6403                 def = gimple_phi_result (iv);
6404               else
6405                 def = gimple_assign_lhs (iv);
6406               new_stmt = gimple_build_assign (make_ssa_name (vectype),
6407                                               PLUS_EXPR,
6408                                               def, vec_step);
6409               if (gimple_code (iv) == GIMPLE_PHI)
6410                 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6411               else
6412                 {
6413                   gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
6414                   gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING);
6415                 }
6416               set_vinfo_for_stmt (new_stmt,
6417                                   new_stmt_vec_info (new_stmt, loop_vinfo));
6418               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6419             }
6420         }
6421
6422       return true;
6423     }
6424
6425   /* Create the vector that holds the initial_value of the induction.  */
6426   if (nested_in_vect_loop)
6427     {
6428       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
6429          been created during vectorization of previous stmts.  We obtain it
6430          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
6431       vec_init = vect_get_vec_def_for_operand (init_expr, phi);
6432       /* If the initial value is not of proper type, convert it.  */
6433       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
6434         {
6435           new_stmt
6436             = gimple_build_assign (vect_get_new_ssa_name (vectype,
6437                                                           vect_simple_var,
6438                                                           "vec_iv_"),
6439                                    VIEW_CONVERT_EXPR,
6440                                    build1 (VIEW_CONVERT_EXPR, vectype,
6441                                            vec_init));
6442           vec_init = gimple_assign_lhs (new_stmt);
6443           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
6444                                                  new_stmt);
6445           gcc_assert (!new_bb);
6446           set_vinfo_for_stmt (new_stmt,
6447                               new_stmt_vec_info (new_stmt, loop_vinfo));
6448         }
6449     }
6450   else
6451     {
6452       vec<constructor_elt, va_gc> *v;
6453
6454       /* iv_loop is the loop to be vectorized. Create:
6455          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
6456       stmts = NULL;
6457       new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
6458
6459       vec_alloc (v, nunits);
6460       bool constant_p = is_gimple_min_invariant (new_name);
6461       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
6462       for (i = 1; i < nunits; i++)
6463         {
6464           /* Create: new_name_i = new_name + step_expr  */
6465           new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
6466                                    new_name, step_expr);
6467           if (!is_gimple_min_invariant (new_name))
6468             constant_p = false;
6469           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
6470         }
6471       if (stmts)
6472         {
6473           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6474           gcc_assert (!new_bb);
6475         }
6476
6477       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
6478       if (constant_p)
6479         new_vec = build_vector_from_ctor (vectype, v);
6480       else
6481         new_vec = build_constructor (vectype, v);
6482       vec_init = vect_init_vector (phi, new_vec, vectype, NULL);
6483     }
6484
6485
6486   /* Create the vector that holds the step of the induction.  */
6487   if (nested_in_vect_loop)
6488     /* iv_loop is nested in the loop to be vectorized. Generate:
6489        vec_step = [S, S, S, S]  */
6490     new_name = step_expr;
6491   else
6492     {
6493       /* iv_loop is the loop to be vectorized. Generate:
6494           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
6495       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6496         {
6497           expr = build_int_cst (integer_type_node, vf);
6498           expr = fold_convert (TREE_TYPE (step_expr), expr);
6499         }
6500       else
6501         expr = build_int_cst (TREE_TYPE (step_expr), vf);
6502       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6503                               expr, step_expr);
6504       if (TREE_CODE (step_expr) == SSA_NAME)
6505         new_name = vect_init_vector (phi, new_name,
6506                                      TREE_TYPE (step_expr), NULL);
6507     }
6508
6509   t = unshare_expr (new_name);
6510   gcc_assert (CONSTANT_CLASS_P (new_name)
6511               || TREE_CODE (new_name) == SSA_NAME);
6512   new_vec = build_vector_from_val (vectype, t);
6513   vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6514
6515
6516   /* Create the following def-use cycle:
6517      loop prolog:
6518          vec_init = ...
6519          vec_step = ...
6520      loop:
6521          vec_iv = PHI <vec_init, vec_loop>
6522          ...
6523          STMT
6524          ...
6525          vec_loop = vec_iv + vec_step;  */
6526
6527   /* Create the induction-phi that defines the induction-operand.  */
6528   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
6529   induction_phi = create_phi_node (vec_dest, iv_loop->header);
6530   set_vinfo_for_stmt (induction_phi,
6531                       new_stmt_vec_info (induction_phi, loop_vinfo));
6532   induc_def = PHI_RESULT (induction_phi);
6533
6534   /* Create the iv update inside the loop  */
6535   vec_def = make_ssa_name (vec_dest);
6536   new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
6537   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6538   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
6539
6540   /* Set the arguments of the phi node:  */
6541   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
6542   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
6543                UNKNOWN_LOCATION);
6544
6545   STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi;
6546
6547   /* In case that vectorization factor (VF) is bigger than the number
6548      of elements that we can fit in a vectype (nunits), we have to generate
6549      more than one vector stmt - i.e - we need to "unroll" the
6550      vector stmt by a factor VF/nunits.  For more details see documentation
6551      in vectorizable_operation.  */
6552
6553   if (ncopies > 1)
6554     {
6555       stmt_vec_info prev_stmt_vinfo;
6556       /* FORNOW. This restriction should be relaxed.  */
6557       gcc_assert (!nested_in_vect_loop);
6558
6559       /* Create the vector that holds the step of the induction.  */
6560       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6561         {
6562           expr = build_int_cst (integer_type_node, nunits);
6563           expr = fold_convert (TREE_TYPE (step_expr), expr);
6564         }
6565       else
6566         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
6567       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6568                               expr, step_expr);
6569       if (TREE_CODE (step_expr) == SSA_NAME)
6570         new_name = vect_init_vector (phi, new_name,
6571                                      TREE_TYPE (step_expr), NULL);
6572       t = unshare_expr (new_name);
6573       gcc_assert (CONSTANT_CLASS_P (new_name)
6574                   || TREE_CODE (new_name) == SSA_NAME);
6575       new_vec = build_vector_from_val (vectype, t);
6576       vec_step = vect_init_vector (phi, new_vec, vectype, NULL);
6577
6578       vec_def = induc_def;
6579       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
6580       for (i = 1; i < ncopies; i++)
6581         {
6582           /* vec_i = vec_prev + vec_step  */
6583           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
6584                                           vec_def, vec_step);
6585           vec_def = make_ssa_name (vec_dest, new_stmt);
6586           gimple_assign_set_lhs (new_stmt, vec_def);
6587
6588           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6589           set_vinfo_for_stmt (new_stmt,
6590                               new_stmt_vec_info (new_stmt, loop_vinfo));
6591           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
6592           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
6593         }
6594     }
6595
6596   if (nested_in_vect_loop)
6597     {
6598       /* Find the loop-closed exit-phi of the induction, and record
6599          the final vector of induction results:  */
6600       exit_phi = NULL;
6601       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
6602         {
6603           gimple *use_stmt = USE_STMT (use_p);
6604           if (is_gimple_debug (use_stmt))
6605             continue;
6606
6607           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
6608             {
6609               exit_phi = use_stmt;
6610               break;
6611             }
6612         }
6613       if (exit_phi)
6614         {
6615           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
6616           /* FORNOW. Currently not supporting the case that an inner-loop induction
6617              is not used in the outer-loop (i.e. only outside the outer-loop).  */
6618           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
6619                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
6620
6621           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
6622           if (dump_enabled_p ())
6623             {
6624               dump_printf_loc (MSG_NOTE, vect_location,
6625                                "vector of inductions after inner-loop:");
6626               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
6627             }
6628         }
6629     }
6630
6631
6632   if (dump_enabled_p ())
6633     {
6634       dump_printf_loc (MSG_NOTE, vect_location,
6635                        "transform induction: created def-use cycle: ");
6636       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
6637       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6638                         SSA_NAME_DEF_STMT (vec_def), 0);
6639     }
6640
6641   return true;
6642 }
6643
6644 /* Function vectorizable_live_operation.
6645
6646    STMT computes a value that is used outside the loop.  Check if
6647    it can be supported.  */
6648
6649 bool
6650 vectorizable_live_operation (gimple *stmt,
6651                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6652                              slp_tree slp_node, int slp_index,
6653                              gimple **vec_stmt)
6654 {
6655   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6656   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6657   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6658   imm_use_iterator imm_iter;
6659   tree lhs, lhs_type, bitsize, vec_bitsize;
6660   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6661   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6662   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6663   gimple *use_stmt;
6664   auto_vec<tree> vec_oprnds;
6665
6666   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
6667
6668   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
6669     return false;
6670
6671   /* FORNOW.  CHECKME.  */
6672   if (nested_in_vect_loop_p (loop, stmt))
6673     return false;
6674
6675   /* If STMT is not relevant and it is a simple assignment and its inputs are
6676      invariant then it can remain in place, unvectorized.  The original last
6677      scalar value that it computes will be used.  */
6678   if (!STMT_VINFO_RELEVANT_P (stmt_info))
6679     {
6680       gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo));
6681       if (dump_enabled_p ())
6682         dump_printf_loc (MSG_NOTE, vect_location,
6683                          "statement is simple and uses invariant.  Leaving in "
6684                          "place.\n");
6685       return true;
6686     }
6687
6688   if (!vec_stmt)
6689     /* No transformation required.  */
6690     return true;
6691
6692   /* If stmt has a related stmt, then use that for getting the lhs.  */
6693   if (is_pattern_stmt_p (stmt_info))
6694     stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6695
6696   lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
6697         : gimple_get_lhs (stmt);
6698   lhs_type = TREE_TYPE (lhs);
6699
6700   bitsize = TYPE_SIZE (TREE_TYPE (vectype));
6701   vec_bitsize = TYPE_SIZE (vectype);
6702
6703   /* Get the vectorized lhs of STMT and the lane to use (counted in bits).  */
6704   tree vec_lhs, bitstart;
6705   if (slp_node)
6706     {
6707       gcc_assert (slp_index >= 0);
6708
6709       int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6710       int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6711
6712       /* Get the last occurrence of the scalar index from the concatenation of
6713          all the slp vectors. Calculate which slp vector it is and the index
6714          within.  */
6715       int pos = (num_vec * nunits) - num_scalar + slp_index;
6716       int vec_entry = pos / nunits;
6717       int vec_index = pos % nunits;
6718
6719       /* Get the correct slp vectorized stmt.  */
6720       vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]);
6721
6722       /* Get entry to use.  */
6723       bitstart = build_int_cst (unsigned_type_node, vec_index);
6724       bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
6725     }
6726   else
6727     {
6728       enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
6729       vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt);
6730
6731       /* For multiple copies, get the last copy.  */
6732       for (int i = 1; i < ncopies; ++i)
6733         vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type,
6734                                                   vec_lhs);
6735
6736       /* Get the last lane in the vector.  */
6737       bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
6738     }
6739
6740   /* Create a new vectorized stmt for the uses of STMT and insert outside the
6741      loop.  */
6742   gimple_seq stmts = NULL;
6743   tree bftype = TREE_TYPE (vectype);
6744   if (VECTOR_BOOLEAN_TYPE_P (vectype))
6745     bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
6746   tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
6747   new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts,
6748                                    true, NULL_TREE);
6749   if (stmts)
6750     gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
6751
6752   /* Replace use of lhs with newly computed result.  If the use stmt is a
6753      single arg PHI, just replace all uses of PHI result.  It's necessary
6754      because lcssa PHI defining lhs may be before newly inserted stmt.  */
6755   use_operand_p use_p;
6756   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
6757     if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
6758         && !is_gimple_debug (use_stmt))
6759     {
6760       if (gimple_code (use_stmt) == GIMPLE_PHI
6761           && gimple_phi_num_args (use_stmt) == 1)
6762         {
6763           replace_uses_by (gimple_phi_result (use_stmt), new_tree);
6764         }
6765       else
6766         {
6767           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6768             SET_USE (use_p, new_tree);
6769         }
6770       update_stmt (use_stmt);
6771     }
6772
6773   return true;
6774 }
6775
6776 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
6777
6778 static void
6779 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
6780 {
6781   ssa_op_iter op_iter;
6782   imm_use_iterator imm_iter;
6783   def_operand_p def_p;
6784   gimple *ustmt;
6785
6786   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
6787     {
6788       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
6789         {
6790           basic_block bb;
6791
6792           if (!is_gimple_debug (ustmt))
6793             continue;
6794
6795           bb = gimple_bb (ustmt);
6796
6797           if (!flow_bb_inside_loop_p (loop, bb))
6798             {
6799               if (gimple_debug_bind_p (ustmt))
6800                 {
6801                   if (dump_enabled_p ())
6802                     dump_printf_loc (MSG_NOTE, vect_location,
6803                                      "killing debug use\n");
6804
6805                   gimple_debug_bind_reset_value (ustmt);
6806                   update_stmt (ustmt);
6807                 }
6808               else
6809                 gcc_unreachable ();
6810             }
6811         }
6812     }
6813 }
6814
6815 /* Given loop represented by LOOP_VINFO, return true if computation of
6816    LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
6817    otherwise.  */
6818
6819 static bool
6820 loop_niters_no_overflow (loop_vec_info loop_vinfo)
6821 {
6822   /* Constant case.  */
6823   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6824     {
6825       tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
6826       tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
6827
6828       gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
6829       gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
6830       if (wi::to_widest (cst_nitersm1) < wi::to_widest (cst_niters))
6831         return true;
6832     }
6833
6834   widest_int max;
6835   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6836   /* Check the upper bound of loop niters.  */
6837   if (get_max_loop_iterations (loop, &max))
6838     {
6839       tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
6840       signop sgn = TYPE_SIGN (type);
6841       widest_int type_max = widest_int::from (wi::max_value (type), sgn);
6842       if (max < type_max)
6843         return true;
6844     }
6845   return false;
6846 }
6847
6848 /* Scale profiling counters by estimation for LOOP which is vectorized
6849    by factor VF.  */
6850
6851 static void
6852 scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
6853 {
6854   edge preheader = loop_preheader_edge (loop);
6855   /* Reduce loop iterations by the vectorization factor.  */
6856   gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
6857   profile_count freq_h = loop->header->count, freq_e = preheader->count;
6858
6859   /* Use frequency only if counts are zero.  */
6860   if (!(freq_h > 0) && !(freq_e > 0))
6861     {
6862       freq_h = profile_count::from_gcov_type (loop->header->frequency);
6863       freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader));
6864     }
6865   if (freq_h > 0)
6866     {
6867       gcov_type scale;
6868
6869       /* Avoid dropping loop body profile counter to 0 because of zero count
6870          in loop's preheader.  */
6871       if (!(freq_e > profile_count::from_gcov_type (1)))
6872        freq_e = profile_count::from_gcov_type (1);
6873       /* This should not overflow.  */
6874       scale = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h);
6875       scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
6876     }
6877
6878   basic_block exit_bb = single_pred (loop->latch);
6879   edge exit_e = single_exit (loop);
6880   exit_e->count = loop_preheader_edge (loop)->count;
6881   exit_e->probability = REG_BR_PROB_BASE / (new_est_niter + 1);
6882
6883   edge exit_l = single_pred_edge (loop->latch);
6884   int prob = exit_l->probability;
6885   exit_l->probability = REG_BR_PROB_BASE - exit_e->probability;
6886   exit_l->count = exit_bb->count - exit_e->count;
6887   if (prob > 0)
6888     scale_bbs_frequencies_int (&loop->latch, 1, exit_l->probability, prob);
6889 }
6890
6891 /* Function vect_transform_loop.
6892
6893    The analysis phase has determined that the loop is vectorizable.
6894    Vectorize the loop - created vectorized stmts to replace the scalar
6895    stmts in the loop, and update the loop exit condition.
6896    Returns scalar epilogue loop if any.  */
6897
6898 struct loop *
6899 vect_transform_loop (loop_vec_info loop_vinfo)
6900 {
6901   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6902   struct loop *epilogue = NULL;
6903   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6904   int nbbs = loop->num_nodes;
6905   int i;
6906   tree niters_vector = NULL;
6907   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6908   bool grouped_store;
6909   bool slp_scheduled = false;
6910   gimple *stmt, *pattern_stmt;
6911   gimple_seq pattern_def_seq = NULL;
6912   gimple_stmt_iterator pattern_def_si = gsi_none ();
6913   bool transform_pattern_stmt = false;
6914   bool check_profitability = false;
6915   int th;
6916
6917   if (dump_enabled_p ())
6918     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
6919
6920   /* Use the more conservative vectorization threshold.  If the number
6921      of iterations is constant assume the cost check has been performed
6922      by our caller.  If the threshold makes all loops profitable that
6923      run at least the vectorization factor number of times checking
6924      is pointless, too.  */
6925   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
6926   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
6927       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6928     {
6929       if (dump_enabled_p ())
6930         dump_printf_loc (MSG_NOTE, vect_location,
6931                          "Profitability threshold is %d loop iterations.\n",
6932                          th);
6933       check_profitability = true;
6934     }
6935
6936   /* Make sure there exists a single-predecessor exit bb.  Do this before
6937      versioning.   */
6938   edge e = single_exit (loop);
6939   if (! single_pred_p (e->dest))
6940     {
6941       split_loop_exit_edge (e);
6942       if (dump_enabled_p ())
6943         dump_printf (MSG_NOTE, "split exit edge\n");
6944     }
6945
6946   /* Version the loop first, if required, so the profitability check
6947      comes first.  */
6948
6949   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
6950     {
6951       vect_loop_versioning (loop_vinfo, th, check_profitability);
6952       check_profitability = false;
6953     }
6954
6955   /* Make sure there exists a single-predecessor exit bb also on the
6956      scalar loop copy.  Do this after versioning but before peeling
6957      so CFG structure is fine for both scalar and if-converted loop
6958      to make slpeel_duplicate_current_defs_from_edges face matched
6959      loop closed PHI nodes on the exit.  */
6960   if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
6961     {
6962       e = single_exit (LOOP_VINFO_SCALAR_LOOP (loop_vinfo));
6963       if (! single_pred_p (e->dest))
6964         {
6965           split_loop_exit_edge (e);
6966           if (dump_enabled_p ())
6967             dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
6968         }
6969     }
6970
6971   tree niters = vect_build_loop_niters (loop_vinfo);
6972   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
6973   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
6974   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
6975   epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
6976                               check_profitability, niters_no_overflow);
6977   if (niters_vector == NULL_TREE)
6978     {
6979       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6980         niters_vector
6981           = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6982                            LOOP_VINFO_INT_NITERS (loop_vinfo) / vf);
6983       else
6984         vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
6985                                      niters_no_overflow);
6986     }
6987
6988   /* 1) Make sure the loop header has exactly two entries
6989      2) Make sure we have a preheader basic block.  */
6990
6991   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6992
6993   split_edge (loop_preheader_edge (loop));
6994
6995   /* FORNOW: the vectorizer supports only loops which body consist
6996      of one basic block (header + empty latch). When the vectorizer will
6997      support more involved loop forms, the order by which the BBs are
6998      traversed need to be reconsidered.  */
6999
7000   for (i = 0; i < nbbs; i++)
7001     {
7002       basic_block bb = bbs[i];
7003       stmt_vec_info stmt_info;
7004
7005       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
7006            gsi_next (&si))
7007         {
7008           gphi *phi = si.phi ();
7009           if (dump_enabled_p ())
7010             {
7011               dump_printf_loc (MSG_NOTE, vect_location,
7012                                "------>vectorizing phi: ");
7013               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
7014             }
7015           stmt_info = vinfo_for_stmt (phi);
7016           if (!stmt_info)
7017             continue;
7018
7019           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
7020             vect_loop_kill_debug_uses (loop, phi);
7021
7022           if (!STMT_VINFO_RELEVANT_P (stmt_info)
7023               && !STMT_VINFO_LIVE_P (stmt_info))
7024             continue;
7025
7026           if (STMT_VINFO_VECTYPE (stmt_info)
7027               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7028                   != (unsigned HOST_WIDE_INT) vf)
7029               && dump_enabled_p ())
7030             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7031
7032           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
7033               && ! PURE_SLP_STMT (stmt_info))
7034             {
7035               if (dump_enabled_p ())
7036                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
7037               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
7038             }
7039         }
7040
7041       pattern_stmt = NULL;
7042       for (gimple_stmt_iterator si = gsi_start_bb (bb);
7043            !gsi_end_p (si) || transform_pattern_stmt;)
7044         {
7045           bool is_store;
7046
7047           if (transform_pattern_stmt)
7048             stmt = pattern_stmt;
7049           else
7050             {
7051               stmt = gsi_stmt (si);
7052               /* During vectorization remove existing clobber stmts.  */
7053               if (gimple_clobber_p (stmt))
7054                 {
7055                   unlink_stmt_vdef (stmt);
7056                   gsi_remove (&si, true);
7057                   release_defs (stmt);
7058                   continue;
7059                 }
7060             }
7061
7062           if (dump_enabled_p ())
7063             {
7064               dump_printf_loc (MSG_NOTE, vect_location,
7065                                "------>vectorizing statement: ");
7066               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7067             }
7068
7069           stmt_info = vinfo_for_stmt (stmt);
7070
7071           /* vector stmts created in the outer-loop during vectorization of
7072              stmts in an inner-loop may not have a stmt_info, and do not
7073              need to be vectorized.  */
7074           if (!stmt_info)
7075             {
7076               gsi_next (&si);
7077               continue;
7078             }
7079
7080           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
7081             vect_loop_kill_debug_uses (loop, stmt);
7082
7083           if (!STMT_VINFO_RELEVANT_P (stmt_info)
7084               && !STMT_VINFO_LIVE_P (stmt_info))
7085             {
7086               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7087                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7088                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7089                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7090                 {
7091                   stmt = pattern_stmt;
7092                   stmt_info = vinfo_for_stmt (stmt);
7093                 }
7094               else
7095                 {
7096                   gsi_next (&si);
7097                   continue;
7098                 }
7099             }
7100           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7101                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7102                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7103                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7104             transform_pattern_stmt = true;
7105
7106           /* If pattern statement has def stmts, vectorize them too.  */
7107           if (is_pattern_stmt_p (stmt_info))
7108             {
7109               if (pattern_def_seq == NULL)
7110                 {
7111                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
7112                   pattern_def_si = gsi_start (pattern_def_seq);
7113                 }
7114               else if (!gsi_end_p (pattern_def_si))
7115                 gsi_next (&pattern_def_si);
7116               if (pattern_def_seq != NULL)
7117                 {
7118                   gimple *pattern_def_stmt = NULL;
7119                   stmt_vec_info pattern_def_stmt_info = NULL;
7120
7121                   while (!gsi_end_p (pattern_def_si))
7122                     {
7123                       pattern_def_stmt = gsi_stmt (pattern_def_si);
7124                       pattern_def_stmt_info
7125                         = vinfo_for_stmt (pattern_def_stmt);
7126                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
7127                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
7128                         break;
7129                       gsi_next (&pattern_def_si);
7130                     }
7131
7132                   if (!gsi_end_p (pattern_def_si))
7133                     {
7134                       if (dump_enabled_p ())
7135                         {
7136                           dump_printf_loc (MSG_NOTE, vect_location,
7137                                            "==> vectorizing pattern def "
7138                                            "stmt: ");
7139                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
7140                                             pattern_def_stmt, 0);
7141                         }
7142
7143                       stmt = pattern_def_stmt;
7144                       stmt_info = pattern_def_stmt_info;
7145                     }
7146                   else
7147                     {
7148                       pattern_def_si = gsi_none ();
7149                       transform_pattern_stmt = false;
7150                     }
7151                 }
7152               else
7153                 transform_pattern_stmt = false;
7154             }
7155
7156           if (STMT_VINFO_VECTYPE (stmt_info))
7157             {
7158               unsigned int nunits
7159                 = (unsigned int)
7160                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7161               if (!STMT_SLP_TYPE (stmt_info)
7162                   && nunits != (unsigned int) vf
7163                   && dump_enabled_p ())
7164                   /* For SLP VF is set according to unrolling factor, and not
7165                      to vector size, hence for SLP this print is not valid.  */
7166                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7167             }
7168
7169           /* SLP. Schedule all the SLP instances when the first SLP stmt is
7170              reached.  */
7171           if (STMT_SLP_TYPE (stmt_info))
7172             {
7173               if (!slp_scheduled)
7174                 {
7175                   slp_scheduled = true;
7176
7177                   if (dump_enabled_p ())
7178                     dump_printf_loc (MSG_NOTE, vect_location,
7179                                      "=== scheduling SLP instances ===\n");
7180
7181                   vect_schedule_slp (loop_vinfo);
7182                 }
7183
7184               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
7185               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
7186                 {
7187                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7188                     {
7189                       pattern_def_seq = NULL;
7190                       gsi_next (&si);
7191                     }
7192                   continue;
7193                 }
7194             }
7195
7196           /* -------- vectorize statement ------------ */
7197           if (dump_enabled_p ())
7198             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
7199
7200           grouped_store = false;
7201           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
7202           if (is_store)
7203             {
7204               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7205                 {
7206                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7207                      interleaving chain was completed - free all the stores in
7208                      the chain.  */
7209                   gsi_next (&si);
7210                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
7211                 }
7212               else
7213                 {
7214                   /* Free the attached stmt_vec_info and remove the stmt.  */
7215                   gimple *store = gsi_stmt (si);
7216                   free_stmt_vec_info (store);
7217                   unlink_stmt_vdef (store);
7218                   gsi_remove (&si, true);
7219                   release_defs (store);
7220                 }
7221
7222               /* Stores can only appear at the end of pattern statements.  */
7223               gcc_assert (!transform_pattern_stmt);
7224               pattern_def_seq = NULL;
7225             }
7226           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7227             {
7228               pattern_def_seq = NULL;
7229               gsi_next (&si);
7230             }
7231         }                       /* stmts in BB */
7232     }                           /* BBs in loop */
7233
7234   slpeel_make_loop_iterate_ntimes (loop, niters_vector);
7235
7236   scale_profile_for_vect_loop (loop, vf);
7237
7238   /* The minimum number of iterations performed by the epilogue.  This
7239      is 1 when peeling for gaps because we always need a final scalar
7240      iteration.  */
7241   int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0;
7242   /* +1 to convert latch counts to loop iteration counts,
7243      -min_epilogue_iters to remove iterations that cannot be performed
7244        by the vector code.  */
7245   int bias = 1 - min_epilogue_iters;
7246   /* In these calculations the "- 1" converts loop iteration counts
7247      back to latch counts.  */
7248   if (loop->any_upper_bound)
7249     loop->nb_iterations_upper_bound
7250       = wi::udiv_floor (loop->nb_iterations_upper_bound + bias, vf) - 1;
7251   if (loop->any_likely_upper_bound)
7252     loop->nb_iterations_likely_upper_bound
7253       = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias, vf) - 1;
7254   if (loop->any_estimate)
7255     loop->nb_iterations_estimate
7256       = wi::udiv_floor (loop->nb_iterations_estimate + bias, vf) - 1;
7257
7258   if (dump_enabled_p ())
7259     {
7260       if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7261         {
7262           dump_printf_loc (MSG_NOTE, vect_location,
7263                            "LOOP VECTORIZED\n");
7264           if (loop->inner)
7265             dump_printf_loc (MSG_NOTE, vect_location,
7266                              "OUTER LOOP VECTORIZED\n");
7267           dump_printf (MSG_NOTE, "\n");
7268         }
7269       else
7270         dump_printf_loc (MSG_NOTE, vect_location,
7271                          "LOOP EPILOGUE VECTORIZED (VS=%d)\n",
7272                          current_vector_size);
7273     }
7274
7275   /* Free SLP instances here because otherwise stmt reference counting
7276      won't work.  */
7277   slp_instance instance;
7278   FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
7279     vect_free_slp_instance (instance);
7280   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
7281   /* Clear-up safelen field since its value is invalid after vectorization
7282      since vectorized loop can have loop-carried dependencies.  */
7283   loop->safelen = 0;
7284
7285   /* Don't vectorize epilogue for epilogue.  */
7286   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7287     epilogue = NULL;
7288
7289   if (epilogue)
7290     {
7291         unsigned int vector_sizes
7292           = targetm.vectorize.autovectorize_vector_sizes ();
7293         vector_sizes &= current_vector_size - 1;
7294
7295         if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
7296           epilogue = NULL;
7297         else if (!vector_sizes)
7298           epilogue = NULL;
7299         else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7300                  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
7301           {
7302             int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
7303             int ratio = current_vector_size / smallest_vec_size;
7304             int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
7305               - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
7306             eiters = eiters % vf;
7307
7308             epilogue->nb_iterations_upper_bound = eiters - 1;
7309
7310             if (eiters < vf / ratio)
7311               epilogue = NULL;
7312             }
7313     }
7314
7315   if (epilogue)
7316     {
7317       epilogue->force_vectorize = loop->force_vectorize;
7318       epilogue->safelen = loop->safelen;
7319       epilogue->dont_vectorize = false;
7320
7321       /* We may need to if-convert epilogue to vectorize it.  */
7322       if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
7323         tree_if_conversion (epilogue);
7324     }
7325
7326   return epilogue;
7327 }
7328
7329 /* The code below is trying to perform simple optimization - revert
7330    if-conversion for masked stores, i.e. if the mask of a store is zero
7331    do not perform it and all stored value producers also if possible.
7332    For example,
7333      for (i=0; i<n; i++)
7334        if (c[i])
7335         {
7336           p1[i] += 1;
7337           p2[i] = p3[i] +2;
7338         }
7339    this transformation will produce the following semi-hammock:
7340
7341    if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
7342      {
7343        vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
7344        vect__12.22_172 = vect__11.19_170 + vect_cst__171;
7345        MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
7346        vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
7347        vect__19.28_184 = vect__18.25_182 + vect_cst__183;
7348        MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
7349      }
7350 */
7351
7352 void
7353 optimize_mask_stores (struct loop *loop)
7354 {
7355   basic_block *bbs = get_loop_body (loop);
7356   unsigned nbbs = loop->num_nodes;
7357   unsigned i;
7358   basic_block bb;
7359   struct loop *bb_loop;
7360   gimple_stmt_iterator gsi;
7361   gimple *stmt;
7362   auto_vec<gimple *> worklist;
7363
7364   vect_location = find_loop_location (loop);
7365   /* Pick up all masked stores in loop if any.  */
7366   for (i = 0; i < nbbs; i++)
7367     {
7368       bb = bbs[i];
7369       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
7370            gsi_next (&gsi))
7371         {
7372           stmt = gsi_stmt (gsi);
7373           if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
7374             worklist.safe_push (stmt);
7375         }
7376     }
7377
7378   free (bbs);
7379   if (worklist.is_empty ())
7380     return;
7381
7382   /* Loop has masked stores.  */
7383   while (!worklist.is_empty ())
7384     {
7385       gimple *last, *last_store;
7386       edge e, efalse;
7387       tree mask;
7388       basic_block store_bb, join_bb;
7389       gimple_stmt_iterator gsi_to;
7390       tree vdef, new_vdef;
7391       gphi *phi;
7392       tree vectype;
7393       tree zero;
7394
7395       last = worklist.pop ();
7396       mask = gimple_call_arg (last, 2);
7397       bb = gimple_bb (last);
7398       /* Create then_bb and if-then structure in CFG, then_bb belongs to
7399          the same loop as if_bb.  It could be different to LOOP when two
7400          level loop-nest is vectorized and mask_store belongs to the inner
7401          one.  */
7402       e = split_block (bb, last);
7403       bb_loop = bb->loop_father;
7404       gcc_assert (loop == bb_loop || flow_loop_nested_p (loop, bb_loop));
7405       join_bb = e->dest;
7406       store_bb = create_empty_bb (bb);
7407       add_bb_to_loop (store_bb, bb_loop);
7408       e->flags = EDGE_TRUE_VALUE;
7409       efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
7410       /* Put STORE_BB to likely part.  */
7411       efalse->probability = PROB_UNLIKELY;
7412       store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse);
7413       make_edge (store_bb, join_bb, EDGE_FALLTHRU);
7414       if (dom_info_available_p (CDI_DOMINATORS))
7415         set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
7416       if (dump_enabled_p ())
7417         dump_printf_loc (MSG_NOTE, vect_location,
7418                          "Create new block %d to sink mask stores.",
7419                          store_bb->index);
7420       /* Create vector comparison with boolean result.  */
7421       vectype = TREE_TYPE (mask);
7422       zero = build_zero_cst (vectype);
7423       stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
7424       gsi = gsi_last_bb (bb);
7425       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7426       /* Create new PHI node for vdef of the last masked store:
7427          .MEM_2 = VDEF <.MEM_1>
7428          will be converted to
7429          .MEM.3 = VDEF <.MEM_1>
7430          and new PHI node will be created in join bb
7431          .MEM_2 = PHI <.MEM_1, .MEM_3>
7432       */
7433       vdef = gimple_vdef (last);
7434       new_vdef = make_ssa_name (gimple_vop (cfun), last);
7435       gimple_set_vdef (last, new_vdef);
7436       phi = create_phi_node (vdef, join_bb);
7437       add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, 0), UNKNOWN_LOCATION);
7438
7439       /* Put all masked stores with the same mask to STORE_BB if possible.  */
7440       while (true)
7441         {
7442           gimple_stmt_iterator gsi_from;
7443           gimple *stmt1 = NULL;
7444
7445           /* Move masked store to STORE_BB.  */
7446           last_store = last;
7447           gsi = gsi_for_stmt (last);
7448           gsi_from = gsi;
7449           /* Shift GSI to the previous stmt for further traversal.  */
7450           gsi_prev (&gsi);
7451           gsi_to = gsi_start_bb (store_bb);
7452           gsi_move_before (&gsi_from, &gsi_to);
7453           /* Setup GSI_TO to the non-empty block start.  */
7454           gsi_to = gsi_start_bb (store_bb);
7455           if (dump_enabled_p ())
7456             {
7457               dump_printf_loc (MSG_NOTE, vect_location,
7458                                "Move stmt to created bb\n");
7459               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
7460             }
7461           /* Move all stored value producers if possible.  */
7462           while (!gsi_end_p (gsi))
7463             {
7464               tree lhs;
7465               imm_use_iterator imm_iter;
7466               use_operand_p use_p;
7467               bool res;
7468
7469               /* Skip debug statements.  */
7470               if (is_gimple_debug (gsi_stmt (gsi)))
7471                 {
7472                   gsi_prev (&gsi);
7473                   continue;
7474                 }
7475               stmt1 = gsi_stmt (gsi);
7476               /* Do not consider statements writing to memory or having
7477                  volatile operand.  */
7478               if (gimple_vdef (stmt1)
7479                   || gimple_has_volatile_ops (stmt1))
7480                 break;
7481               gsi_from = gsi;
7482               gsi_prev (&gsi);
7483               lhs = gimple_get_lhs (stmt1);
7484               if (!lhs)
7485                 break;
7486
7487               /* LHS of vectorized stmt must be SSA_NAME.  */
7488               if (TREE_CODE (lhs) != SSA_NAME)
7489                 break;
7490
7491               if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
7492                 {
7493                   /* Remove dead scalar statement.  */
7494                   if (has_zero_uses (lhs))
7495                     {
7496                       gsi_remove (&gsi_from, true);
7497                       continue;
7498                     }
7499                 }
7500
7501               /* Check that LHS does not have uses outside of STORE_BB.  */
7502               res = true;
7503               FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
7504                 {
7505                   gimple *use_stmt;
7506                   use_stmt = USE_STMT (use_p);
7507                   if (is_gimple_debug (use_stmt))
7508                     continue;
7509                   if (gimple_bb (use_stmt) != store_bb)
7510                     {
7511                       res = false;
7512                       break;
7513                     }
7514                 }
7515               if (!res)
7516                 break;
7517
7518               if (gimple_vuse (stmt1)
7519                   && gimple_vuse (stmt1) != gimple_vuse (last_store))
7520                 break;
7521
7522               /* Can move STMT1 to STORE_BB.  */
7523               if (dump_enabled_p ())
7524                 {
7525                   dump_printf_loc (MSG_NOTE, vect_location,
7526                                    "Move stmt to created bb\n");
7527                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
7528                 }
7529               gsi_move_before (&gsi_from, &gsi_to);
7530               /* Shift GSI_TO for further insertion.  */
7531               gsi_prev (&gsi_to);
7532             }
7533           /* Put other masked stores with the same mask to STORE_BB.  */
7534           if (worklist.is_empty ()
7535               || gimple_call_arg (worklist.last (), 2) != mask
7536               || worklist.last () != stmt1)
7537             break;
7538           last = worklist.pop ();
7539         }
7540       add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION);
7541     }
7542 }