gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "backend.h"
  27 #include "cfghooks.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "rtl.h"
  31 #include "ssa.h"
  32 #include "alias.h"
  33 #include "fold-const.h"
  34 #include "stor-layout.h"
  35 #include "cfganal.h"
  36 #include "gimple-pretty-print.h"
  37 #include "internal-fn.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-pass.h"
  45 #include "cfgloop.h"
  46 #include "flags.h"
  47 #include "insn-codes.h"
  48 #include "optabs-tree.h"
  49 #include "params.h"
  50 #include "diagnostic-core.h"
  51 #include "tree-chrec.h"
  52 #include "tree-scalar-evolution.h"
  53 #include "tree-vectorizer.h"
  54 #include "target.h"
  55
  56 /* Loop Vectorization Pass.
  57
  58    This pass tries to vectorize loops.
  59
  60    For example, the vectorizer transforms the following simple loop:
  61
  62         short a[N]; short b[N]; short c[N]; int i;
  63
  64         for (i=0; i<N; i++){
  65           a[i] = b[i] + c[i];
  66         }
  67
  68    as if it was manually vectorized by rewriting the source code into:
  69
  70         typedef int __attribute__((mode(V8HI))) v8hi;
  71         short a[N];  short b[N]; short c[N];   int i;
  72         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  73         v8hi va, vb, vc;
  74
  75         for (i=0; i<N/8; i++){
  76           vb = pb[i];
  77           vc = pc[i];
  78           va = vb + vc;
  79           pa[i] = va;
  80         }
  81
  82         The main entry to this pass is vectorize_loops(), in which
  83    the vectorizer applies a set of analyses on a given set of loops,
  84    followed by the actual vectorization transformation for the loops that
  85    had successfully passed the analysis phase.
  86         Throughout this pass we make a distinction between two types of
  87    data: scalars (which are represented by SSA_NAMES), and memory references
  88    ("data-refs").  These two types of data require different handling both
  89    during analysis and transformation. The types of data-refs that the
  90    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  91    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  92    accesses are required to have a simple (consecutive) access pattern.
  93
  94    Analysis phase:
  95    ===============
  96         The driver for the analysis phase is vect_analyze_loop().
  97    It applies a set of analyses, some of which rely on the scalar evolution
  98    analyzer (scev) developed by Sebastian Pop.
  99
 100         During the analysis phase the vectorizer records some information
 101    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
 102    loop, as well as general information about the loop as a whole, which is
 103    recorded in a "loop_vec_info" struct attached to each loop.
 104
 105    Transformation phase:
 106    =====================
 107         The loop transformation phase scans all the stmts in the loop, and
 108    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 109    the loop that needs to be vectorized.  It inserts the vector code sequence
 110    just before the scalar stmt S, and records a pointer to the vector code
 111    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 112    attached to S).  This pointer will be used for the vectorization of following
 113    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 114    otherwise, we rely on dead code elimination for removing it.
 115
 116         For example, say stmt S1 was vectorized into stmt VS1:
 117
 118    VS1: vb = px[i];
 119    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 120    S2:  a = b;
 121
 122    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 123    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 124    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 125    resulting sequence would be:
 126
 127    VS1: vb = px[i];
 128    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 129    VS2: va = vb;
 130    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 131
 132         Operands that are not SSA_NAMEs, are data-refs that appear in
 133    load/store operations (like 'x[i]' in S1), and are handled differently.
 134
 135    Target modeling:
 136    =================
 137         Currently the only target specific information that is used is the
 138    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 139    Targets that can support different sizes of vectors, for now will need
 140    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 141    flexibility will be added in the future.
 142
 143         Since we only vectorize operations which vector form can be
 144    expressed using existing tree codes, to verify that an operation is
 145    supported, the vectorizer checks the relevant optab at the relevant
 146    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 147    the value found is CODE_FOR_nothing, then there's no target support, and
 148    we can't vectorize the stmt.
 149
 150    For additional information on this project see:
 151    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 152 */
 153
 154 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 155
 156 /* Function vect_determine_vectorization_factor
 157
 158    Determine the vectorization factor (VF).  VF is the number of data elements
 159    that are operated upon in parallel in a single iteration of the vectorized
 160    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 161    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 162    elements can fit in a single vector register.
 163
 164    We currently support vectorization of loops in which all types operated upon
 165    are of the same size.  Therefore this function currently sets VF according to
 166    the size of the types operated upon, and fails if there are multiple sizes
 167    in the loop.
 168
 169    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 170    original loop:
 171         for (i=0; i<N; i++){
 172           a[i] = b[i] + c[i];
 173         }
 174
 175    vectorized loop:
 176         for (i=0; i<N; i+=VF){
 177           a[i:VF] = b[i:VF] + c[i:VF];
 178         }
 179 */
 180
 181 static bool
 182 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 183 {
 184   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 185   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 186   int nbbs = loop->num_nodes;
 187   unsigned int vectorization_factor = 0;
 188   tree scalar_type;
 189   gphi *phi;
 190   tree vectype;
 191   unsigned int nunits;
 192   stmt_vec_info stmt_info;
 193   int i;
 194   HOST_WIDE_INT dummy;
 195   gimple *stmt, *pattern_stmt = NULL;
 196   gimple_seq pattern_def_seq = NULL;
 197   gimple_stmt_iterator pattern_def_si = gsi_none ();
 198   bool analyze_pattern_stmt = false;
 199
 200   if (dump_enabled_p ())
 201     dump_printf_loc (MSG_NOTE, vect_location,
 202                      "=== vect_determine_vectorization_factor ===\n");
 203
 204   for (i = 0; i < nbbs; i++)
 205     {
 206       basic_block bb = bbs[i];
 207
 208       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 209            gsi_next (&si))
 210         {
 211           phi = si.phi ();
 212           stmt_info = vinfo_for_stmt (phi);
 213           if (dump_enabled_p ())
 214             {
 215               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 216               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 217               dump_printf (MSG_NOTE, "\n");
 218             }
 219
 220           gcc_assert (stmt_info);
 221
 222           if (STMT_VINFO_RELEVANT_P (stmt_info))
 223             {
 224               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 225               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 226
 227               if (dump_enabled_p ())
 228                 {
 229                   dump_printf_loc (MSG_NOTE, vect_location,
 230                                    "get vectype for scalar type:  ");
 231                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 232                   dump_printf (MSG_NOTE, "\n");
 233                 }
 234
 235               vectype = get_vectype_for_scalar_type (scalar_type);
 236               if (!vectype)
 237                 {
 238                   if (dump_enabled_p ())
 239                     {
 240                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 241                                        "not vectorized: unsupported "
 242                                        "data-type ");
 243                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 244                                          scalar_type);
 245                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 246                     }
 247                   return false;
 248                 }
 249               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 250
 251               if (dump_enabled_p ())
 252                 {
 253                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 254                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 255                   dump_printf (MSG_NOTE, "\n");
 256                 }
 257
 258               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 259               if (dump_enabled_p ())
 260                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 261                                  nunits);
 262
 263               if (!vectorization_factor
 264                   || (nunits > vectorization_factor))
 265                 vectorization_factor = nunits;
 266             }
 267         }
 268
 269       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 270            !gsi_end_p (si) || analyze_pattern_stmt;)
 271         {
 272           tree vf_vectype;
 273
 274           if (analyze_pattern_stmt)
 275             stmt = pattern_stmt;
 276           else
 277             stmt = gsi_stmt (si);
 278
 279           stmt_info = vinfo_for_stmt (stmt);
 280
 281           if (dump_enabled_p ())
 282             {
 283               dump_printf_loc (MSG_NOTE, vect_location,
 284                                "==> examining statement: ");
 285               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 286               dump_printf (MSG_NOTE, "\n");
 287             }
 288
 289           gcc_assert (stmt_info);
 290
 291           /* Skip stmts which do not need to be vectorized.  */
 292           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 293                && !STMT_VINFO_LIVE_P (stmt_info))
 294               || gimple_clobber_p (stmt))
 295             {
 296               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 297                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 298                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 299                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 300                 {
 301                   stmt = pattern_stmt;
 302                   stmt_info = vinfo_for_stmt (pattern_stmt);
 303                   if (dump_enabled_p ())
 304                     {
 305                       dump_printf_loc (MSG_NOTE, vect_location,
 306                                        "==> examining pattern statement: ");
 307                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 308                       dump_printf (MSG_NOTE, "\n");
 309                     }
 310                 }
 311               else
 312                 {
 313                   if (dump_enabled_p ())
 314                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 315                   gsi_next (&si);
 316                   continue;
 317                 }
 318             }
 319           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 320                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 321                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 322                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 323             analyze_pattern_stmt = true;
 324
 325           /* If a pattern statement has def stmts, analyze them too.  */
 326           if (is_pattern_stmt_p (stmt_info))
 327             {
 328               if (pattern_def_seq == NULL)
 329                 {
 330                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 331                   pattern_def_si = gsi_start (pattern_def_seq);
 332                 }
 333               else if (!gsi_end_p (pattern_def_si))
 334                 gsi_next (&pattern_def_si);
 335               if (pattern_def_seq != NULL)
 336                 {
 337                   gimple *pattern_def_stmt = NULL;
 338                   stmt_vec_info pattern_def_stmt_info = NULL;
 339
 340                   while (!gsi_end_p (pattern_def_si))
 341                     {
 342                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 343                       pattern_def_stmt_info
 344                         = vinfo_for_stmt (pattern_def_stmt);
 345                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 346                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 347                         break;
 348                       gsi_next (&pattern_def_si);
 349                     }
 350
 351                   if (!gsi_end_p (pattern_def_si))
 352                     {
 353                       if (dump_enabled_p ())
 354                         {
 355                           dump_printf_loc (MSG_NOTE, vect_location,
 356                                            "==> examining pattern def stmt: ");
 357                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 358                                             pattern_def_stmt, 0);
 359                           dump_printf (MSG_NOTE, "\n");
 360                         }
 361
 362                       stmt = pattern_def_stmt;
 363                       stmt_info = pattern_def_stmt_info;
 364                     }
 365                   else
 366                     {
 367                       pattern_def_si = gsi_none ();
 368                       analyze_pattern_stmt = false;
 369                     }
 370                 }
 371               else
 372                 analyze_pattern_stmt = false;
 373             }
 374
 375           if (gimple_get_lhs (stmt) == NULL_TREE
 376               /* MASK_STORE has no lhs, but is ok.  */
 377               && (!is_gimple_call (stmt)
 378                   || !gimple_call_internal_p (stmt)
 379                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 380             {
 381               if (is_gimple_call (stmt))
 382                 {
 383                   /* Ignore calls with no lhs.  These must be calls to
 384                      #pragma omp simd functions, and what vectorization factor
 385                      it really needs can't be determined until
 386                      vectorizable_simd_clone_call.  */
 387                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 388                     {
 389                       pattern_def_seq = NULL;
 390                       gsi_next (&si);
 391                     }
 392                   continue;
 393                 }
 394               if (dump_enabled_p ())
 395                 {
 396                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 397                                    "not vectorized: irregular stmt.");
 398                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 399                                     0);
 400                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 401                 }
 402               return false;
 403             }
 404
 405           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 406             {
 407               if (dump_enabled_p ())
 408                 {
 409                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 410                                    "not vectorized: vector stmt in loop:");
 411                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 412                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 413                 }
 414               return false;
 415             }
 416
 417           if (STMT_VINFO_VECTYPE (stmt_info))
 418             {
 419               /* The only case when a vectype had been already set is for stmts
 420                  that contain a dataref, or for "pattern-stmts" (stmts
 421                  generated by the vectorizer to represent/replace a certain
 422                  idiom).  */
 423               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 424                           || is_pattern_stmt_p (stmt_info)
 425                           || !gsi_end_p (pattern_def_si));
 426               vectype = STMT_VINFO_VECTYPE (stmt_info);
 427             }
 428           else
 429             {
 430               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 431               if (is_gimple_call (stmt)
 432                   && gimple_call_internal_p (stmt)
 433                   && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
 434                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 435               else
 436                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 437               if (dump_enabled_p ())
 438                 {
 439                   dump_printf_loc (MSG_NOTE, vect_location,
 440                                    "get vectype for scalar type:  ");
 441                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 442                   dump_printf (MSG_NOTE, "\n");
 443                 }
 444               vectype = get_vectype_for_scalar_type (scalar_type);
 445               if (!vectype)
 446                 {
 447                   if (dump_enabled_p ())
 448                     {
 449                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 450                                        "not vectorized: unsupported "
 451                                        "data-type ");
 452                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 453                                          scalar_type);
 454                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 455                     }
 456                   return false;
 457                 }
 458
 459               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 460
 461               if (dump_enabled_p ())
 462                 {
 463                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 464                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 465                   dump_printf (MSG_NOTE, "\n");
 466                 }
 467             }
 468
 469           /* The vectorization factor is according to the smallest
 470              scalar type (or the largest vector size, but we only
 471              support one vector size per loop).  */
 472           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 473                                                        &dummy);
 474           if (dump_enabled_p ())
 475             {
 476               dump_printf_loc (MSG_NOTE, vect_location,
 477                                "get vectype for scalar type:  ");
 478               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 479               dump_printf (MSG_NOTE, "\n");
 480             }
 481           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 482           if (!vf_vectype)
 483             {
 484               if (dump_enabled_p ())
 485                 {
 486                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 487                                    "not vectorized: unsupported data-type ");
 488                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 489                                      scalar_type);
 490                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 491                 }
 492               return false;
 493             }
 494
 495           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 496                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 497             {
 498               if (dump_enabled_p ())
 499                 {
 500                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 501                                    "not vectorized: different sized vector "
 502                                    "types in statement, ");
 503                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 504                                      vectype);
 505                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 506                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 507                                      vf_vectype);
 508                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 509                 }
 510               return false;
 511             }
 512
 513           if (dump_enabled_p ())
 514             {
 515               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 516               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 517               dump_printf (MSG_NOTE, "\n");
 518             }
 519
 520           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 521           if (dump_enabled_p ())
 522             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 523           if (!vectorization_factor
 524               || (nunits > vectorization_factor))
 525             vectorization_factor = nunits;
 526
 527           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 528             {
 529               pattern_def_seq = NULL;
 530               gsi_next (&si);
 531             }
 532         }
 533     }
 534
 535   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 536   if (dump_enabled_p ())
 537     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 538                      vectorization_factor);
 539   if (vectorization_factor <= 1)
 540     {
 541       if (dump_enabled_p ())
 542         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 543                          "not vectorized: unsupported data-type\n");
 544       return false;
 545     }
 546   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 547
 548   return true;
 549 }
 550
 551
 552 /* Function vect_is_simple_iv_evolution.
 553
 554    FORNOW: A simple evolution of an induction variables in the loop is
 555    considered a polynomial evolution.  */
 556
 557 static bool
 558 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 559                              tree * step)
 560 {
 561   tree init_expr;
 562   tree step_expr;
 563   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 564   basic_block bb;
 565
 566   /* When there is no evolution in this loop, the evolution function
 567      is not "simple".  */
 568   if (evolution_part == NULL_TREE)
 569     return false;
 570
 571   /* When the evolution is a polynomial of degree >= 2
 572      the evolution function is not "simple".  */
 573   if (tree_is_chrec (evolution_part))
 574     return false;
 575
 576   step_expr = evolution_part;
 577   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 578
 579   if (dump_enabled_p ())
 580     {
 581       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 582       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 583       dump_printf (MSG_NOTE, ",  init: ");
 584       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 585       dump_printf (MSG_NOTE, "\n");
 586     }
 587
 588   *init = init_expr;
 589   *step = step_expr;
 590
 591   if (TREE_CODE (step_expr) != INTEGER_CST
 592       && (TREE_CODE (step_expr) != SSA_NAME
 593           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 594               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 595           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 596               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 597                   || !flag_associative_math)))
 598       && (TREE_CODE (step_expr) != REAL_CST
 599           || !flag_associative_math))
 600     {
 601       if (dump_enabled_p ())
 602         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 603                          "step unknown.\n");
 604       return false;
 605     }
 606
 607   return true;
 608 }
 609
 610 /* Function vect_analyze_scalar_cycles_1.
 611
 612    Examine the cross iteration def-use cycles of scalar variables
 613    in LOOP.  LOOP_VINFO represents the loop that is now being
 614    considered for vectorization (can be LOOP, or an outer-loop
 615    enclosing LOOP).  */
 616
 617 static void
 618 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 619 {
 620   basic_block bb = loop->header;
 621   tree init, step;
 622   auto_vec<gimple *, 64> worklist;
 623   gphi_iterator gsi;
 624   bool double_reduc;
 625
 626   if (dump_enabled_p ())
 627     dump_printf_loc (MSG_NOTE, vect_location,
 628                      "=== vect_analyze_scalar_cycles ===\n");
 629
 630   /* First - identify all inductions.  Reduction detection assumes that all the
 631      inductions have been identified, therefore, this order must not be
 632      changed.  */
 633   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 634     {
 635       gphi *phi = gsi.phi ();
 636       tree access_fn = NULL;
 637       tree def = PHI_RESULT (phi);
 638       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 639
 640       if (dump_enabled_p ())
 641         {
 642           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 643           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 644           dump_printf (MSG_NOTE, "\n");
 645         }
 646
 647       /* Skip virtual phi's.  The data dependences that are associated with
 648          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 649       if (virtual_operand_p (def))
 650         continue;
 651
 652       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 653
 654       /* Analyze the evolution function.  */
 655       access_fn = analyze_scalar_evolution (loop, def);
 656       if (access_fn)
 657         {
 658           STRIP_NOPS (access_fn);
 659           if (dump_enabled_p ())
 660             {
 661               dump_printf_loc (MSG_NOTE, vect_location,
 662                                "Access function of PHI: ");
 663               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 664               dump_printf (MSG_NOTE, "\n");
 665             }
 666           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 667             = evolution_part_in_loop_num (access_fn, loop->num);
 668         }
 669
 670       if (!access_fn
 671           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 672           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 673               && TREE_CODE (step) != INTEGER_CST))
 674         {
 675           worklist.safe_push (phi);
 676           continue;
 677         }
 678
 679       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 680
 681       if (dump_enabled_p ())
 682         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 683       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 684     }
 685
 686
 687   /* Second - identify all reductions and nested cycles.  */
 688   while (worklist.length () > 0)
 689     {
 690       gimple *phi = worklist.pop ();
 691       tree def = PHI_RESULT (phi);
 692       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 693       gimple *reduc_stmt;
 694       bool nested_cycle;
 695
 696       if (dump_enabled_p ())
 697         {
 698           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 699           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 700           dump_printf (MSG_NOTE, "\n");
 701         }
 702
 703       gcc_assert (!virtual_operand_p (def)
 704                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 705
 706       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 707       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 708                                                 &double_reduc, false);
 709       if (reduc_stmt)
 710         {
 711           if (double_reduc)
 712             {
 713               if (dump_enabled_p ())
 714                 dump_printf_loc (MSG_NOTE, vect_location,
 715                                  "Detected double reduction.\n");
 716
 717               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 718               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 719                                                     vect_double_reduction_def;
 720             }
 721           else
 722             {
 723               if (nested_cycle)
 724                 {
 725                   if (dump_enabled_p ())
 726                     dump_printf_loc (MSG_NOTE, vect_location,
 727                                      "Detected vectorizable nested cycle.\n");
 728
 729                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 730                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 731                                                              vect_nested_cycle;
 732                 }
 733               else
 734                 {
 735                   if (dump_enabled_p ())
 736                     dump_printf_loc (MSG_NOTE, vect_location,
 737                                      "Detected reduction.\n");
 738
 739                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 740                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 741                                                            vect_reduction_def;
 742                   /* Store the reduction cycles for possible vectorization in
 743                      loop-aware SLP.  */
 744                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 745                 }
 746             }
 747         }
 748       else
 749         if (dump_enabled_p ())
 750           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 751                            "Unknown def-use cycle pattern.\n");
 752     }
 753 }
 754
 755
 756 /* Function vect_analyze_scalar_cycles.
 757
 758    Examine the cross iteration def-use cycles of scalar variables, by
 759    analyzing the loop-header PHIs of scalar variables.  Classify each
 760    cycle as one of the following: invariant, induction, reduction, unknown.
 761    We do that for the loop represented by LOOP_VINFO, and also to its
 762    inner-loop, if exists.
 763    Examples for scalar cycles:
 764
 765    Example1: reduction:
 766
 767               loop1:
 768               for (i=0; i<N; i++)
 769                  sum += a[i];
 770
 771    Example2: induction:
 772
 773               loop2:
 774               for (i=0; i<N; i++)
 775                  a[i] = i;  */
 776
 777 static void
 778 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 779 {
 780   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 781
 782   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 783
 784   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 785      Reductions in such inner-loop therefore have different properties than
 786      the reductions in the nest that gets vectorized:
 787      1. When vectorized, they are executed in the same order as in the original
 788         scalar loop, so we can't change the order of computation when
 789         vectorizing them.
 790      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 791         current checks are too strict.  */
 792
 793   if (loop->inner)
 794     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 795 }
 796
 797 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 798
 799 static void
 800 vect_fixup_reduc_chain (gimple *stmt)
 801 {
 802   gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 803   gimple *stmtp;
 804   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 805               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 806   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 807   do
 808     {
 809       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 810       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 811       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 812       if (stmt)
 813         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 814           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 815     }
 816   while (stmt);
 817   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 818 }
 819
 820 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 821
 822 static void
 823 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 824 {
 825   gimple *first;
 826   unsigned i;
 827
 828   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 829     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 830       {
 831         vect_fixup_reduc_chain (first);
 832         LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 833           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 834       }
 835 }
 836
 837 /* Function vect_get_loop_niters.
 838
 839    Determine how many iterations the loop is executed and place it
 840    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
 841    in NUMBER_OF_ITERATIONSM1.
 842
 843    Return the loop exit condition.  */
 844
 845
 846 static gcond *
 847 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
 848                       tree *number_of_iterationsm1)
 849 {
 850   tree niters;
 851
 852   if (dump_enabled_p ())
 853     dump_printf_loc (MSG_NOTE, vect_location,
 854                      "=== get_loop_niters ===\n");
 855
 856   niters = number_of_latch_executions (loop);
 857   *number_of_iterationsm1 = niters;
 858
 859   /* We want the number of loop header executions which is the number
 860      of latch executions plus one.
 861      ???  For UINT_MAX latch executions this number overflows to zero
 862      for loops like do { n++; } while (n != 0);  */
 863   if (niters && !chrec_contains_undetermined (niters))
 864     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
 865                           build_int_cst (TREE_TYPE (niters), 1));
 866   *number_of_iterations = niters;
 867
 868   return get_loop_exit_condition (loop);
 869 }
 870
 871
 872 /* Function bb_in_loop_p
 873
 874    Used as predicate for dfs order traversal of the loop bbs.  */
 875
 876 static bool
 877 bb_in_loop_p (const_basic_block bb, const void *data)
 878 {
 879   const struct loop *const loop = (const struct loop *)data;
 880   if (flow_bb_inside_loop_p (loop, bb))
 881     return true;
 882   return false;
 883 }
 884
 885
 886 /* Function new_loop_vec_info.
 887
 888    Create and initialize a new loop_vec_info struct for LOOP, as well as
 889    stmt_vec_info structs for all the stmts in LOOP.  */
 890
 891 static loop_vec_info
 892 new_loop_vec_info (struct loop *loop)
 893 {
 894   loop_vec_info res;
 895   basic_block *bbs;
 896   gimple_stmt_iterator si;
 897   unsigned int i, nbbs;
 898
 899   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 900   LOOP_VINFO_LOOP (res) = loop;
 901
 902   bbs = get_loop_body (loop);
 903
 904   /* Create/Update stmt_info for all stmts in the loop.  */
 905   for (i = 0; i < loop->num_nodes; i++)
 906     {
 907       basic_block bb = bbs[i];
 908
 909       /* BBs in a nested inner-loop will have been already processed (because
 910          we will have called vect_analyze_loop_form for any nested inner-loop).
 911          Therefore, for stmts in an inner-loop we just want to update the
 912          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 913          loop_info of the outer-loop we are currently considering to vectorize
 914          (instead of the loop_info of the inner-loop).
 915          For stmts in other BBs we need to create a stmt_info from scratch.  */
 916       if (bb->loop_father != loop)
 917         {
 918           /* Inner-loop bb.  */
 919           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 920           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 921             {
 922               gimple *phi = gsi_stmt (si);
 923               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 924               loop_vec_info inner_loop_vinfo =
 925                 STMT_VINFO_LOOP_VINFO (stmt_info);
 926               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 927               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 928             }
 929           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 930            {
 931              gimple *stmt = gsi_stmt (si);
 932               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 933               loop_vec_info inner_loop_vinfo =
 934                  STMT_VINFO_LOOP_VINFO (stmt_info);
 935               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 936               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 937            }
 938         }
 939       else
 940         {
 941           /* bb in current nest.  */
 942           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 943             {
 944               gimple *phi = gsi_stmt (si);
 945               gimple_set_uid (phi, 0);
 946               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 947             }
 948
 949           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 950             {
 951               gimple *stmt = gsi_stmt (si);
 952               gimple_set_uid (stmt, 0);
 953               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 954             }
 955         }
 956     }
 957
 958   /* CHECKME: We want to visit all BBs before their successors (except for
 959      latch blocks, for which this assertion wouldn't hold).  In the simple
 960      case of the loop forms we allow, a dfs order of the BBs would the same
 961      as reversed postorder traversal, so we are safe.  */
 962
 963    free (bbs);
 964    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 965    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 966                               bbs, loop->num_nodes, loop);
 967    gcc_assert (nbbs == loop->num_nodes);
 968
 969   LOOP_VINFO_BBS (res) = bbs;
 970   LOOP_VINFO_NITERSM1 (res) = NULL;
 971   LOOP_VINFO_NITERS (res) = NULL;
 972   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 973   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 974   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
 975   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 976   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
 977   LOOP_VINFO_VECT_FACTOR (res) = 0;
 978   LOOP_VINFO_LOOP_NEST (res).create (3);
 979   LOOP_VINFO_DATAREFS (res).create (10);
 980   LOOP_VINFO_DDRS (res).create (10 * 10);
 981   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 982   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 983              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 984   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 985              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 986   LOOP_VINFO_GROUPED_STORES (res).create (10);
 987   LOOP_VINFO_REDUCTIONS (res).create (10);
 988   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 989   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 990   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 991   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 992   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 993   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
 994   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 995
 996   return res;
 997 }
 998
 999
1000 /* Function destroy_loop_vec_info.
1001
1002    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1003    stmts in the loop.  */
1004
1005 void
1006 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1007 {
1008   struct loop *loop;
1009   basic_block *bbs;
1010   int nbbs;
1011   gimple_stmt_iterator si;
1012   int j;
1013   vec<slp_instance> slp_instances;
1014   slp_instance instance;
1015   bool swapped;
1016
1017   if (!loop_vinfo)
1018     return;
1019
1020   loop = LOOP_VINFO_LOOP (loop_vinfo);
1021
1022   bbs = LOOP_VINFO_BBS (loop_vinfo);
1023   nbbs = clean_stmts ? loop->num_nodes : 0;
1024   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1025
1026   for (j = 0; j < nbbs; j++)
1027     {
1028       basic_block bb = bbs[j];
1029       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1030         free_stmt_vec_info (gsi_stmt (si));
1031
1032       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1033         {
1034           gimple *stmt = gsi_stmt (si);
1035
1036           /* We may have broken canonical form by moving a constant
1037              into RHS1 of a commutative op.  Fix such occurrences.  */
1038           if (swapped && is_gimple_assign (stmt))
1039             {
1040               enum tree_code code = gimple_assign_rhs_code (stmt);
1041
1042               if ((code == PLUS_EXPR
1043                    || code == POINTER_PLUS_EXPR
1044                    || code == MULT_EXPR)
1045                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1046                 swap_ssa_operands (stmt,
1047                                    gimple_assign_rhs1_ptr (stmt),
1048                                    gimple_assign_rhs2_ptr (stmt));
1049             }
1050
1051           /* Free stmt_vec_info.  */
1052           free_stmt_vec_info (stmt);
1053           gsi_next (&si);
1054         }
1055     }
1056
1057   free (LOOP_VINFO_BBS (loop_vinfo));
1058   vect_destroy_datarefs (loop_vinfo, NULL);
1059   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1060   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1061   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1062   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1063   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1064   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1065     vect_free_slp_instance (instance);
1066
1067   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1068   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1069   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1070   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1071
1072   delete LOOP_VINFO_PEELING_HTAB (loop_vinfo);
1073   LOOP_VINFO_PEELING_HTAB (loop_vinfo) = NULL;
1074
1075   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1076   loop_vinfo->scalar_cost_vec.release ();
1077
1078   free (loop_vinfo);
1079   loop->aux = NULL;
1080 }
1081
1082
1083 /* Calculate the cost of one scalar iteration of the loop.  */
1084 static void
1085 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1086 {
1087   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1088   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1089   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1090   int innerloop_iters, i;
1091
1092   /* Count statements in scalar loop.  Using this as scalar cost for a single
1093      iteration for now.
1094
1095      TODO: Add outer loop support.
1096
1097      TODO: Consider assigning different costs to different scalar
1098      statements.  */
1099
1100   /* FORNOW.  */
1101   innerloop_iters = 1;
1102   if (loop->inner)
1103     innerloop_iters = 50; /* FIXME */
1104
1105   for (i = 0; i < nbbs; i++)
1106     {
1107       gimple_stmt_iterator si;
1108       basic_block bb = bbs[i];
1109
1110       if (bb->loop_father == loop->inner)
1111         factor = innerloop_iters;
1112       else
1113         factor = 1;
1114
1115       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1116         {
1117           gimple *stmt = gsi_stmt (si);
1118           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1119
1120           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1121             continue;
1122
1123           /* Skip stmts that are not vectorized inside the loop.  */
1124           if (stmt_info
1125               && !STMT_VINFO_RELEVANT_P (stmt_info)
1126               && (!STMT_VINFO_LIVE_P (stmt_info)
1127                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1128               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1129             continue;
1130
1131           vect_cost_for_stmt kind;
1132           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1133             {
1134               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1135                kind = scalar_load;
1136              else
1137                kind = scalar_store;
1138             }
1139           else
1140             kind = scalar_stmt;
1141
1142           scalar_single_iter_cost
1143             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1144                                  factor, kind, NULL, 0, vect_prologue);
1145         }
1146     }
1147   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1148     = scalar_single_iter_cost;
1149 }
1150
1151
1152 /* Function vect_analyze_loop_1.
1153
1154    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1155    for it. The different analyses will record information in the
1156    loop_vec_info struct.  This is a subset of the analyses applied in
1157    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1158    that is now considered for (outer-loop) vectorization.  */
1159
1160 static loop_vec_info
1161 vect_analyze_loop_1 (struct loop *loop)
1162 {
1163   loop_vec_info loop_vinfo;
1164
1165   if (dump_enabled_p ())
1166     dump_printf_loc (MSG_NOTE, vect_location,
1167                      "===== analyze_loop_nest_1 =====\n");
1168
1169   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1170
1171   loop_vinfo = vect_analyze_loop_form (loop);
1172   if (!loop_vinfo)
1173     {
1174       if (dump_enabled_p ())
1175         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1176                          "bad inner-loop form.\n");
1177       return NULL;
1178     }
1179
1180   return loop_vinfo;
1181 }
1182
1183
1184 /* Function vect_analyze_loop_form.
1185
1186    Verify that certain CFG restrictions hold, including:
1187    - the loop has a pre-header
1188    - the loop has a single entry and exit
1189    - the loop exit condition is simple enough, and the number of iterations
1190      can be analyzed (a countable loop).  */
1191
1192 loop_vec_info
1193 vect_analyze_loop_form (struct loop *loop)
1194 {
1195   loop_vec_info loop_vinfo;
1196   gcond *loop_cond;
1197   tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
1198   loop_vec_info inner_loop_vinfo = NULL;
1199
1200   if (dump_enabled_p ())
1201     dump_printf_loc (MSG_NOTE, vect_location,
1202                      "=== vect_analyze_loop_form ===\n");
1203
1204   /* Different restrictions apply when we are considering an inner-most loop,
1205      vs. an outer (nested) loop.
1206      (FORNOW. May want to relax some of these restrictions in the future).  */
1207
1208   if (!loop->inner)
1209     {
1210       /* Inner-most loop.  We currently require that the number of BBs is
1211          exactly 2 (the header and latch).  Vectorizable inner-most loops
1212          look like this:
1213
1214                         (pre-header)
1215                            |
1216                           header <--------+
1217                            | |            |
1218                            | +--> latch --+
1219                            |
1220                         (exit-bb)  */
1221
1222       if (loop->num_nodes != 2)
1223         {
1224           if (dump_enabled_p ())
1225             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1226                              "not vectorized: control flow in loop.\n");
1227           return NULL;
1228         }
1229
1230       if (empty_block_p (loop->header))
1231         {
1232           if (dump_enabled_p ())
1233             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1234                              "not vectorized: empty loop.\n");
1235           return NULL;
1236         }
1237     }
1238   else
1239     {
1240       struct loop *innerloop = loop->inner;
1241       edge entryedge;
1242
1243       /* Nested loop. We currently require that the loop is doubly-nested,
1244          contains a single inner loop, and the number of BBs is exactly 5.
1245          Vectorizable outer-loops look like this:
1246
1247                         (pre-header)
1248                            |
1249                           header <---+
1250                            |         |
1251                           inner-loop |
1252                            |         |
1253                           tail ------+
1254                            |
1255                         (exit-bb)
1256
1257          The inner-loop has the properties expected of inner-most loops
1258          as described above.  */
1259
1260       if ((loop->inner)->inner || (loop->inner)->next)
1261         {
1262           if (dump_enabled_p ())
1263             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1264                              "not vectorized: multiple nested loops.\n");
1265           return NULL;
1266         }
1267
1268       /* Analyze the inner-loop.  */
1269       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1270       if (!inner_loop_vinfo)
1271         {
1272           if (dump_enabled_p ())
1273             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1274                              "not vectorized: Bad inner loop.\n");
1275           return NULL;
1276         }
1277
1278       if (!expr_invariant_in_loop_p (loop,
1279                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1280         {
1281           if (dump_enabled_p ())
1282             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1283                              "not vectorized: inner-loop count not"
1284                              " invariant.\n");
1285           destroy_loop_vec_info (inner_loop_vinfo, true);
1286           return NULL;
1287         }
1288
1289       if (loop->num_nodes != 5)
1290         {
1291           if (dump_enabled_p ())
1292             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1293                              "not vectorized: control flow in loop.\n");
1294           destroy_loop_vec_info (inner_loop_vinfo, true);
1295           return NULL;
1296         }
1297
1298       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1299       entryedge = EDGE_PRED (innerloop->header, 0);
1300       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1301         entryedge = EDGE_PRED (innerloop->header, 1);
1302
1303       if (entryedge->src != loop->header
1304           || !single_exit (innerloop)
1305           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1306         {
1307           if (dump_enabled_p ())
1308             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1309                              "not vectorized: unsupported outerloop form.\n");
1310           destroy_loop_vec_info (inner_loop_vinfo, true);
1311           return NULL;
1312         }
1313
1314       if (dump_enabled_p ())
1315         dump_printf_loc (MSG_NOTE, vect_location,
1316                          "Considering outer-loop vectorization.\n");
1317     }
1318
1319   if (!single_exit (loop)
1320       || EDGE_COUNT (loop->header->preds) != 2)
1321     {
1322       if (dump_enabled_p ())
1323         {
1324           if (!single_exit (loop))
1325             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1326                              "not vectorized: multiple exits.\n");
1327           else if (EDGE_COUNT (loop->header->preds) != 2)
1328             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1329                              "not vectorized: too many incoming edges.\n");
1330         }
1331       if (inner_loop_vinfo)
1332         destroy_loop_vec_info (inner_loop_vinfo, true);
1333       return NULL;
1334     }
1335
1336   /* We assume that the loop exit condition is at the end of the loop. i.e,
1337      that the loop is represented as a do-while (with a proper if-guard
1338      before the loop if needed), where the loop header contains all the
1339      executable statements, and the latch is empty.  */
1340   if (!empty_block_p (loop->latch)
1341       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1342     {
1343       if (dump_enabled_p ())
1344         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1345                          "not vectorized: latch block not empty.\n");
1346       if (inner_loop_vinfo)
1347         destroy_loop_vec_info (inner_loop_vinfo, true);
1348       return NULL;
1349     }
1350
1351   /* Make sure there exists a single-predecessor exit bb:  */
1352   if (!single_pred_p (single_exit (loop)->dest))
1353     {
1354       edge e = single_exit (loop);
1355       if (!(e->flags & EDGE_ABNORMAL))
1356         {
1357           split_loop_exit_edge (e);
1358           if (dump_enabled_p ())
1359             dump_printf (MSG_NOTE, "split exit edge.\n");
1360         }
1361       else
1362         {
1363           if (dump_enabled_p ())
1364             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1365                              "not vectorized: abnormal loop exit edge.\n");
1366           if (inner_loop_vinfo)
1367             destroy_loop_vec_info (inner_loop_vinfo, true);
1368           return NULL;
1369         }
1370     }
1371
1372   loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
1373                                     &number_of_iterationsm1);
1374   if (!loop_cond)
1375     {
1376       if (dump_enabled_p ())
1377         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1378                          "not vectorized: complicated exit condition.\n");
1379       if (inner_loop_vinfo)
1380         destroy_loop_vec_info (inner_loop_vinfo, true);
1381       return NULL;
1382     }
1383
1384   if (!number_of_iterations
1385       || chrec_contains_undetermined (number_of_iterations))
1386     {
1387       if (dump_enabled_p ())
1388         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1389                          "not vectorized: number of iterations cannot be "
1390                          "computed.\n");
1391       if (inner_loop_vinfo)
1392         destroy_loop_vec_info (inner_loop_vinfo, true);
1393       return NULL;
1394     }
1395
1396   if (integer_zerop (number_of_iterations))
1397     {
1398       if (dump_enabled_p ())
1399         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1400                          "not vectorized: number of iterations = 0.\n");
1401       if (inner_loop_vinfo)
1402         destroy_loop_vec_info (inner_loop_vinfo, true);
1403       return NULL;
1404     }
1405
1406   loop_vinfo = new_loop_vec_info (loop);
1407   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1408   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1409   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1410
1411   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1412     {
1413       if (dump_enabled_p ())
1414         {
1415           dump_printf_loc (MSG_NOTE, vect_location,
1416                            "Symbolic number of iterations is ");
1417           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1418           dump_printf (MSG_NOTE, "\n");
1419         }
1420     }
1421
1422   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1423
1424   /* CHECKME: May want to keep it around it in the future.  */
1425   if (inner_loop_vinfo)
1426     destroy_loop_vec_info (inner_loop_vinfo, false);
1427
1428   gcc_assert (!loop->aux);
1429   loop->aux = loop_vinfo;
1430   return loop_vinfo;
1431 }
1432
1433 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1434    statements update the vectorization factor.  */
1435
1436 static void
1437 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1438 {
1439   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1440   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1441   int nbbs = loop->num_nodes;
1442   unsigned int vectorization_factor;
1443   int i;
1444
1445   if (dump_enabled_p ())
1446     dump_printf_loc (MSG_NOTE, vect_location,
1447                      "=== vect_update_vf_for_slp ===\n");
1448
1449   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1450   gcc_assert (vectorization_factor != 0);
1451
1452   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1453      vectorization factor of the loop is the unrolling factor required by
1454      the SLP instances.  If that unrolling factor is 1, we say, that we
1455      perform pure SLP on loop - cross iteration parallelism is not
1456      exploited.  */
1457   bool only_slp_in_loop = true;
1458   for (i = 0; i < nbbs; i++)
1459     {
1460       basic_block bb = bbs[i];
1461       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1462            gsi_next (&si))
1463         {
1464           gimple *stmt = gsi_stmt (si);
1465           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1466           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1467               && STMT_VINFO_RELATED_STMT (stmt_info))
1468             {
1469               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1470               stmt_info = vinfo_for_stmt (stmt);
1471             }
1472           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1473                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1474               && !PURE_SLP_STMT (stmt_info))
1475             /* STMT needs both SLP and loop-based vectorization.  */
1476             only_slp_in_loop = false;
1477         }
1478     }
1479
1480   if (only_slp_in_loop)
1481     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1482   else
1483     vectorization_factor
1484       = least_common_multiple (vectorization_factor,
1485                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1486
1487   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1488   if (dump_enabled_p ())
1489     dump_printf_loc (MSG_NOTE, vect_location,
1490                      "Updating vectorization factor to %d\n",
1491                      vectorization_factor);
1492 }
1493
1494 /* Function vect_analyze_loop_operations.
1495
1496    Scan the loop stmts and make sure they are all vectorizable.  */
1497
1498 static bool
1499 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1500 {
1501   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1502   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1503   int nbbs = loop->num_nodes;
1504   unsigned int vectorization_factor;
1505   int i;
1506   stmt_vec_info stmt_info;
1507   bool need_to_vectorize = false;
1508   int min_profitable_iters;
1509   int min_scalar_loop_bound;
1510   unsigned int th;
1511   bool ok;
1512   HOST_WIDE_INT max_niter;
1513   HOST_WIDE_INT estimated_niter;
1514   int min_profitable_estimate;
1515
1516   if (dump_enabled_p ())
1517     dump_printf_loc (MSG_NOTE, vect_location,
1518                      "=== vect_analyze_loop_operations ===\n");
1519
1520   for (i = 0; i < nbbs; i++)
1521     {
1522       basic_block bb = bbs[i];
1523
1524       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1525            gsi_next (&si))
1526         {
1527           gphi *phi = si.phi ();
1528           ok = true;
1529
1530           stmt_info = vinfo_for_stmt (phi);
1531           if (dump_enabled_p ())
1532             {
1533               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1534               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1535               dump_printf (MSG_NOTE, "\n");
1536             }
1537
1538           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1539              (i.e., a phi in the tail of the outer-loop).  */
1540           if (! is_loop_header_bb_p (bb))
1541             {
1542               /* FORNOW: we currently don't support the case that these phis
1543                  are not used in the outerloop (unless it is double reduction,
1544                  i.e., this phi is vect_reduction_def), cause this case
1545                  requires to actually do something here.  */
1546               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1547                    || STMT_VINFO_LIVE_P (stmt_info))
1548                   && STMT_VINFO_DEF_TYPE (stmt_info)
1549                      != vect_double_reduction_def)
1550                 {
1551                   if (dump_enabled_p ())
1552                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1553                                      "Unsupported loop-closed phi in "
1554                                      "outer-loop.\n");
1555                   return false;
1556                 }
1557
1558               /* If PHI is used in the outer loop, we check that its operand
1559                  is defined in the inner loop.  */
1560               if (STMT_VINFO_RELEVANT_P (stmt_info))
1561                 {
1562                   tree phi_op;
1563                   gimple *op_def_stmt;
1564
1565                   if (gimple_phi_num_args (phi) != 1)
1566                     return false;
1567
1568                   phi_op = PHI_ARG_DEF (phi, 0);
1569                   if (TREE_CODE (phi_op) != SSA_NAME)
1570                     return false;
1571
1572                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1573                   if (gimple_nop_p (op_def_stmt)
1574                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1575                       || !vinfo_for_stmt (op_def_stmt))
1576                     return false;
1577
1578                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1579                         != vect_used_in_outer
1580                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1581                            != vect_used_in_outer_by_reduction)
1582                     return false;
1583                 }
1584
1585               continue;
1586             }
1587
1588           gcc_assert (stmt_info);
1589
1590           if (STMT_VINFO_LIVE_P (stmt_info))
1591             {
1592               /* FORNOW: not yet supported.  */
1593               if (dump_enabled_p ())
1594                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1595                                  "not vectorized: value used after loop.\n");
1596               return false;
1597             }
1598
1599           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1600               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1601             {
1602               /* A scalar-dependence cycle that we don't support.  */
1603               if (dump_enabled_p ())
1604                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1605                                  "not vectorized: scalar dependence cycle.\n");
1606               return false;
1607             }
1608
1609           if (STMT_VINFO_RELEVANT_P (stmt_info))
1610             {
1611               need_to_vectorize = true;
1612               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1613                 ok = vectorizable_induction (phi, NULL, NULL);
1614             }
1615
1616           if (!ok)
1617             {
1618               if (dump_enabled_p ())
1619                 {
1620                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1621                                    "not vectorized: relevant phi not "
1622                                    "supported: ");
1623                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1624                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1625                 }
1626               return false;
1627             }
1628         }
1629
1630       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1631            gsi_next (&si))
1632         {
1633           gimple *stmt = gsi_stmt (si);
1634           if (!gimple_clobber_p (stmt)
1635               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1636             return false;
1637         }
1638     } /* bbs */
1639
1640   /* All operations in the loop are either irrelevant (deal with loop
1641      control, or dead), or only used outside the loop and can be moved
1642      out of the loop (e.g. invariants, inductions).  The loop can be
1643      optimized away by scalar optimizations.  We're better off not
1644      touching this loop.  */
1645   if (!need_to_vectorize)
1646     {
1647       if (dump_enabled_p ())
1648         dump_printf_loc (MSG_NOTE, vect_location,
1649                          "All the computation can be taken out of the loop.\n");
1650       if (dump_enabled_p ())
1651         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1652                          "not vectorized: redundant loop. no profit to "
1653                          "vectorize.\n");
1654       return false;
1655     }
1656
1657   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1658   gcc_assert (vectorization_factor != 0);
1659
1660   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1661     dump_printf_loc (MSG_NOTE, vect_location,
1662                      "vectorization_factor = %d, niters = "
1663                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1664                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1665
1666   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1667        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1668       || ((max_niter = max_stmt_executions_int (loop)) != -1
1669           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1670     {
1671       if (dump_enabled_p ())
1672         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1673                          "not vectorized: iteration count too small.\n");
1674       if (dump_enabled_p ())
1675         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1676                          "not vectorized: iteration count smaller than "
1677                          "vectorization factor.\n");
1678       return false;
1679     }
1680
1681   /* Analyze cost.  Decide if worth while to vectorize.  */
1682
1683   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1684                                       &min_profitable_estimate);
1685   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1686
1687   if (min_profitable_iters < 0)
1688     {
1689       if (dump_enabled_p ())
1690         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1691                          "not vectorized: vectorization not profitable.\n");
1692       if (dump_enabled_p ())
1693         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1694                          "not vectorized: vector version will never be "
1695                          "profitable.\n");
1696       return false;
1697     }
1698
1699   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1700                             * vectorization_factor) - 1);
1701
1702
1703   /* Use the cost model only if it is more conservative than user specified
1704      threshold.  */
1705
1706   th = (unsigned) min_scalar_loop_bound;
1707   if (min_profitable_iters
1708       && (!min_scalar_loop_bound
1709           || min_profitable_iters > min_scalar_loop_bound))
1710     th = (unsigned) min_profitable_iters;
1711
1712   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
1713
1714   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1715       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1716     {
1717       if (dump_enabled_p ())
1718         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1719                          "not vectorized: vectorization not profitable.\n");
1720       if (dump_enabled_p ())
1721         dump_printf_loc (MSG_NOTE, vect_location,
1722                          "not vectorized: iteration count smaller than user "
1723                          "specified loop bound parameter or minimum profitable "
1724                          "iterations (whichever is more conservative).\n");
1725       return false;
1726     }
1727
1728   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1729       && ((unsigned HOST_WIDE_INT) estimated_niter
1730           <= MAX (th, (unsigned)min_profitable_estimate)))
1731     {
1732       if (dump_enabled_p ())
1733         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1734                          "not vectorized: estimated iteration count too "
1735                          "small.\n");
1736       if (dump_enabled_p ())
1737         dump_printf_loc (MSG_NOTE, vect_location,
1738                          "not vectorized: estimated iteration count smaller "
1739                          "than specified loop bound parameter or minimum "
1740                          "profitable iterations (whichever is more "
1741                          "conservative).\n");
1742       return false;
1743     }
1744
1745   return true;
1746 }
1747
1748
1749 /* Function vect_analyze_loop_2.
1750
1751    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1752    for it.  The different analyses will record information in the
1753    loop_vec_info struct.  */
1754 static bool
1755 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1756 {
1757   bool ok;
1758   int max_vf = MAX_VECTORIZATION_FACTOR;
1759   int min_vf = 2;
1760   unsigned int th;
1761   unsigned int n_stmts = 0;
1762
1763   /* Find all data references in the loop (which correspond to vdefs/vuses)
1764      and analyze their evolution in the loop.  Also adjust the minimal
1765      vectorization factor according to the loads and stores.
1766
1767      FORNOW: Handle only simple, array references, which
1768      alignment can be forced, and aligned pointer-references.  */
1769
1770   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf, &n_stmts);
1771   if (!ok)
1772     {
1773       if (dump_enabled_p ())
1774         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1775                          "bad data references.\n");
1776       return false;
1777     }
1778
1779   /* Classify all cross-iteration scalar data-flow cycles.
1780      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1781
1782   vect_analyze_scalar_cycles (loop_vinfo);
1783
1784   vect_pattern_recog (loop_vinfo, NULL);
1785
1786   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1787
1788   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1789      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1790
1791   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1792   if (!ok)
1793     {
1794       if (dump_enabled_p ())
1795         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796                          "bad data access.\n");
1797       return false;
1798     }
1799
1800   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1801
1802   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1803   if (!ok)
1804     {
1805       if (dump_enabled_p ())
1806         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807                          "unexpected pattern.\n");
1808       return false;
1809     }
1810
1811   /* Analyze data dependences between the data-refs in the loop
1812      and adjust the maximum vectorization factor according to
1813      the dependences.
1814      FORNOW: fail at the first data dependence that we encounter.  */
1815
1816   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1817   if (!ok
1818       || max_vf < min_vf)
1819     {
1820       if (dump_enabled_p ())
1821             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1822                              "bad data dependence.\n");
1823       return false;
1824     }
1825
1826   ok = vect_determine_vectorization_factor (loop_vinfo);
1827   if (!ok)
1828     {
1829       if (dump_enabled_p ())
1830         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1831                          "can't determine vectorization factor.\n");
1832       return false;
1833     }
1834   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1835     {
1836       if (dump_enabled_p ())
1837         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838                          "bad data dependence.\n");
1839       return false;
1840     }
1841
1842   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1843   ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
1844   if (!ok)
1845     return false;
1846
1847   /* If there are any SLP instances mark them as pure_slp.  */
1848   bool slp = vect_make_slp_decision (loop_vinfo);
1849   if (slp)
1850     {
1851       /* Find stmts that need to be both vectorized and SLPed.  */
1852       vect_detect_hybrid_slp (loop_vinfo);
1853
1854       /* Update the vectorization factor based on the SLP decision.  */
1855       vect_update_vf_for_slp (loop_vinfo);
1856     }
1857
1858   /* Analyze the alignment of the data-refs in the loop.
1859      Fail if a data reference is found that cannot be vectorized.  */
1860
1861   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1862   if (!ok)
1863     {
1864       if (dump_enabled_p ())
1865         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1866                          "bad data alignment.\n");
1867       return false;
1868     }
1869
1870   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1871      It is important to call pruning after vect_analyze_data_ref_accesses,
1872      since we use grouping information gathered by interleaving analysis.  */
1873   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1874   if (!ok)
1875     {
1876       if (dump_enabled_p ())
1877         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878                          "number of versioning for alias "
1879                          "run-time tests exceeds %d "
1880                          "(--param vect-max-version-for-alias-checks)\n",
1881                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1882       return false;
1883     }
1884
1885   /* Compute the scalar iteration cost.  */
1886   vect_get_single_scalar_iteration_cost (loop_vinfo);
1887
1888   /* This pass will decide on using loop versioning and/or loop peeling in
1889      order to enhance the alignment of data references in the loop.  */
1890
1891   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1892   if (!ok)
1893     {
1894       if (dump_enabled_p ())
1895         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1896                          "bad data alignment.\n");
1897       return false;
1898     }
1899
1900   if (slp)
1901     {
1902       /* Analyze operations in the SLP instances.  Note this may
1903          remove unsupported SLP instances which makes the above
1904          SLP kind detection invalid.  */
1905       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
1906       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1907                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1908       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
1909         return false;
1910     }
1911
1912   /* Scan all the remaining operations in the loop that are not subject
1913      to SLP and make sure they are vectorizable.  */
1914   ok = vect_analyze_loop_operations (loop_vinfo);
1915   if (!ok)
1916     {
1917       if (dump_enabled_p ())
1918         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1919                          "bad operation or unsupported loop bound.\n");
1920       return false;
1921     }
1922
1923   /* Decide whether we need to create an epilogue loop to handle
1924      remaining scalar iterations.  */
1925   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
1926         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1927        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1928
1929   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1930       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1931     {
1932       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
1933                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1934           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1935         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1936     }
1937   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1938            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1939                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1940                /* In case of versioning, check if the maximum number of
1941                   iterations is greater than th.  If they are identical,
1942                   the epilogue is unnecessary.  */
1943                && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1944                     && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1945                    || (unsigned HOST_WIDE_INT)max_stmt_executions_int
1946                         (LOOP_VINFO_LOOP (loop_vinfo)) > th)))
1947     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1948
1949   /* If an epilogue loop is required make sure we can create one.  */
1950   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1951       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
1952     {
1953       if (dump_enabled_p ())
1954         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
1955       if (!vect_can_advance_ivs_p (loop_vinfo)
1956           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
1957                                            single_exit (LOOP_VINFO_LOOP
1958                                                          (loop_vinfo))))
1959         {
1960           if (dump_enabled_p ())
1961             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1962                              "not vectorized: can't create required "
1963                              "epilog loop\n");
1964           return false;
1965         }
1966     }
1967
1968   return true;
1969 }
1970
1971 /* Function vect_analyze_loop.
1972
1973    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1974    for it.  The different analyses will record information in the
1975    loop_vec_info struct.  */
1976 loop_vec_info
1977 vect_analyze_loop (struct loop *loop)
1978 {
1979   loop_vec_info loop_vinfo;
1980   unsigned int vector_sizes;
1981
1982   /* Autodetect first vector size we try.  */
1983   current_vector_size = 0;
1984   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1985
1986   if (dump_enabled_p ())
1987     dump_printf_loc (MSG_NOTE, vect_location,
1988                      "===== analyze_loop_nest =====\n");
1989
1990   if (loop_outer (loop)
1991       && loop_vec_info_for_loop (loop_outer (loop))
1992       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1993     {
1994       if (dump_enabled_p ())
1995         dump_printf_loc (MSG_NOTE, vect_location,
1996                          "outer-loop already vectorized.\n");
1997       return NULL;
1998     }
1999
2000   while (1)
2001     {
2002       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2003       loop_vinfo = vect_analyze_loop_form (loop);
2004       if (!loop_vinfo)
2005         {
2006           if (dump_enabled_p ())
2007             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2008                              "bad loop form.\n");
2009           return NULL;
2010         }
2011
2012       if (vect_analyze_loop_2 (loop_vinfo))
2013         {
2014           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2015
2016           return loop_vinfo;
2017         }
2018
2019       destroy_loop_vec_info (loop_vinfo, true);
2020
2021       vector_sizes &= ~current_vector_size;
2022       if (vector_sizes == 0
2023           || current_vector_size == 0)
2024         return NULL;
2025
2026       /* Try the next biggest vector size.  */
2027       current_vector_size = 1 << floor_log2 (vector_sizes);
2028       if (dump_enabled_p ())
2029         dump_printf_loc (MSG_NOTE, vect_location,
2030                          "***** Re-trying analysis with "
2031                          "vector size %d\n", current_vector_size);
2032     }
2033 }
2034
2035
2036 /* Function reduction_code_for_scalar_code
2037
2038    Input:
2039    CODE - tree_code of a reduction operations.
2040
2041    Output:
2042    REDUC_CODE - the corresponding tree-code to be used to reduce the
2043       vector of partial results into a single scalar result, or ERROR_MARK
2044       if the operation is a supported reduction operation, but does not have
2045       such a tree-code.
2046
2047    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2048
2049 static bool
2050 reduction_code_for_scalar_code (enum tree_code code,
2051                                 enum tree_code *reduc_code)
2052 {
2053   switch (code)
2054     {
2055       case MAX_EXPR:
2056         *reduc_code = REDUC_MAX_EXPR;
2057         return true;
2058
2059       case MIN_EXPR:
2060         *reduc_code = REDUC_MIN_EXPR;
2061         return true;
2062
2063       case PLUS_EXPR:
2064         *reduc_code = REDUC_PLUS_EXPR;
2065         return true;
2066
2067       case MULT_EXPR:
2068       case MINUS_EXPR:
2069       case BIT_IOR_EXPR:
2070       case BIT_XOR_EXPR:
2071       case BIT_AND_EXPR:
2072         *reduc_code = ERROR_MARK;
2073         return true;
2074
2075       default:
2076        return false;
2077     }
2078 }
2079
2080
2081 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2082    STMT is printed with a message MSG. */
2083
2084 static void
2085 report_vect_op (int msg_type, gimple *stmt, const char *msg)
2086 {
2087   dump_printf_loc (msg_type, vect_location, "%s", msg);
2088   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2089   dump_printf (msg_type, "\n");
2090 }
2091
2092
2093 /* Detect SLP reduction of the form:
2094
2095    #a1 = phi <a5, a0>
2096    a2 = operation (a1)
2097    a3 = operation (a2)
2098    a4 = operation (a3)
2099    a5 = operation (a4)
2100
2101    #a = phi <a5>
2102
2103    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2104    FIRST_STMT is the first reduction stmt in the chain
2105    (a2 = operation (a1)).
2106
2107    Return TRUE if a reduction chain was detected.  */
2108
2109 static bool
2110 vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
2111                        gimple *first_stmt)
2112 {
2113   struct loop *loop = (gimple_bb (phi))->loop_father;
2114   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2115   enum tree_code code;
2116   gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt;
2117   stmt_vec_info use_stmt_info, current_stmt_info;
2118   tree lhs;
2119   imm_use_iterator imm_iter;
2120   use_operand_p use_p;
2121   int nloop_uses, size = 0, n_out_of_loop_uses;
2122   bool found = false;
2123
2124   if (loop != vect_loop)
2125     return false;
2126
2127   lhs = PHI_RESULT (phi);
2128   code = gimple_assign_rhs_code (first_stmt);
2129   while (1)
2130     {
2131       nloop_uses = 0;
2132       n_out_of_loop_uses = 0;
2133       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2134         {
2135           gimple *use_stmt = USE_STMT (use_p);
2136           if (is_gimple_debug (use_stmt))
2137             continue;
2138
2139           /* Check if we got back to the reduction phi.  */
2140           if (use_stmt == phi)
2141             {
2142               loop_use_stmt = use_stmt;
2143               found = true;
2144               break;
2145             }
2146
2147           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2148             {
2149               loop_use_stmt = use_stmt;
2150               nloop_uses++;
2151             }
2152            else
2153              n_out_of_loop_uses++;
2154
2155            /* There are can be either a single use in the loop or two uses in
2156               phi nodes.  */
2157            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2158              return false;
2159         }
2160
2161       if (found)
2162         break;
2163
2164       /* We reached a statement with no loop uses.  */
2165       if (nloop_uses == 0)
2166         return false;
2167
2168       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2169       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2170         return false;
2171
2172       if (!is_gimple_assign (loop_use_stmt)
2173           || code != gimple_assign_rhs_code (loop_use_stmt)
2174           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2175         return false;
2176
2177       /* Insert USE_STMT into reduction chain.  */
2178       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2179       if (current_stmt)
2180         {
2181           current_stmt_info = vinfo_for_stmt (current_stmt);
2182           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2183           GROUP_FIRST_ELEMENT (use_stmt_info)
2184             = GROUP_FIRST_ELEMENT (current_stmt_info);
2185         }
2186       else
2187         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2188
2189       lhs = gimple_assign_lhs (loop_use_stmt);
2190       current_stmt = loop_use_stmt;
2191       size++;
2192    }
2193
2194   if (!found || loop_use_stmt != phi || size < 2)
2195     return false;
2196
2197   /* Swap the operands, if needed, to make the reduction operand be the second
2198      operand.  */
2199   lhs = PHI_RESULT (phi);
2200   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2201   while (next_stmt)
2202     {
2203       if (gimple_assign_rhs2 (next_stmt) == lhs)
2204         {
2205           tree op = gimple_assign_rhs1 (next_stmt);
2206           gimple *def_stmt = NULL;
2207
2208           if (TREE_CODE (op) == SSA_NAME)
2209             def_stmt = SSA_NAME_DEF_STMT (op);
2210
2211           /* Check that the other def is either defined in the loop
2212              ("vect_internal_def"), or it's an induction (defined by a
2213              loop-header phi-node).  */
2214           if (def_stmt
2215               && gimple_bb (def_stmt)
2216               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2217               && (is_gimple_assign (def_stmt)
2218                   || is_gimple_call (def_stmt)
2219                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2220                            == vect_induction_def
2221                   || (gimple_code (def_stmt) == GIMPLE_PHI
2222                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2223                                   == vect_internal_def
2224                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2225             {
2226               lhs = gimple_assign_lhs (next_stmt);
2227               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2228               continue;
2229             }
2230
2231           return false;
2232         }
2233       else
2234         {
2235           tree op = gimple_assign_rhs2 (next_stmt);
2236           gimple *def_stmt = NULL;
2237
2238           if (TREE_CODE (op) == SSA_NAME)
2239             def_stmt = SSA_NAME_DEF_STMT (op);
2240
2241           /* Check that the other def is either defined in the loop
2242             ("vect_internal_def"), or it's an induction (defined by a
2243             loop-header phi-node).  */
2244           if (def_stmt
2245               && gimple_bb (def_stmt)
2246               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2247               && (is_gimple_assign (def_stmt)
2248                   || is_gimple_call (def_stmt)
2249                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2250                               == vect_induction_def
2251                   || (gimple_code (def_stmt) == GIMPLE_PHI
2252                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2253                                   == vect_internal_def
2254                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2255             {
2256               if (dump_enabled_p ())
2257                 {
2258                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2259                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2260                   dump_printf (MSG_NOTE, "\n");
2261                 }
2262
2263               swap_ssa_operands (next_stmt,
2264                                  gimple_assign_rhs1_ptr (next_stmt),
2265                                  gimple_assign_rhs2_ptr (next_stmt));
2266               update_stmt (next_stmt);
2267
2268               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2269                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2270             }
2271           else
2272             return false;
2273         }
2274
2275       lhs = gimple_assign_lhs (next_stmt);
2276       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2277     }
2278
2279   /* Save the chain for further analysis in SLP detection.  */
2280   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2281   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2282   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2283
2284   return true;
2285 }
2286
2287
2288 /* Function vect_is_simple_reduction_1
2289
2290    (1) Detect a cross-iteration def-use cycle that represents a simple
2291    reduction computation.  We look for the following pattern:
2292
2293    loop_header:
2294      a1 = phi < a0, a2 >
2295      a3 = ...
2296      a2 = operation (a3, a1)
2297
2298    or
2299
2300    a3 = ...
2301    loop_header:
2302      a1 = phi < a0, a2 >
2303      a2 = operation (a3, a1)
2304
2305    such that:
2306    1. operation is commutative and associative and it is safe to
2307       change the order of the computation (if CHECK_REDUCTION is true)
2308    2. no uses for a2 in the loop (a2 is used out of the loop)
2309    3. no uses of a1 in the loop besides the reduction operation
2310    4. no uses of a1 outside the loop.
2311
2312    Conditions 1,4 are tested here.
2313    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2314
2315    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2316    nested cycles, if CHECK_REDUCTION is false.
2317
2318    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2319    reductions:
2320
2321      a1 = phi < a0, a2 >
2322      inner loop (def of a3)
2323      a2 = phi < a3 >
2324
2325    If MODIFY is true it tries also to rework the code in-place to enable
2326    detection of more reduction patterns.  For the time being we rewrite
2327    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2328 */
2329
2330 static gimple *
2331 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
2332                             bool check_reduction, bool *double_reduc,
2333                             bool modify, bool need_wrapping_integral_overflow)
2334 {
2335   struct loop *loop = (gimple_bb (phi))->loop_father;
2336   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2337   edge latch_e = loop_latch_edge (loop);
2338   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2339   gimple *def_stmt, *def1 = NULL, *def2 = NULL;
2340   enum tree_code orig_code, code;
2341   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2342   tree type;
2343   int nloop_uses;
2344   tree name;
2345   imm_use_iterator imm_iter;
2346   use_operand_p use_p;
2347   bool phi_def;
2348
2349   *double_reduc = false;
2350
2351   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2352      otherwise, we assume outer loop vectorization.  */
2353   gcc_assert ((check_reduction && loop == vect_loop)
2354               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2355
2356   name = PHI_RESULT (phi);
2357   /* ???  If there are no uses of the PHI result the inner loop reduction
2358      won't be detected as possibly double-reduction by vectorizable_reduction
2359      because that tries to walk the PHI arg from the preheader edge which
2360      can be constant.  See PR60382.  */
2361   if (has_zero_uses (name))
2362     return NULL;
2363   nloop_uses = 0;
2364   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2365     {
2366       gimple *use_stmt = USE_STMT (use_p);
2367       if (is_gimple_debug (use_stmt))
2368         continue;
2369
2370       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2371         {
2372           if (dump_enabled_p ())
2373             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2374                              "intermediate value used outside loop.\n");
2375
2376           return NULL;
2377         }
2378
2379       nloop_uses++;
2380       if (nloop_uses > 1)
2381         {
2382           if (dump_enabled_p ())
2383             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384                              "reduction used in loop.\n");
2385           return NULL;
2386         }
2387     }
2388
2389   if (TREE_CODE (loop_arg) != SSA_NAME)
2390     {
2391       if (dump_enabled_p ())
2392         {
2393           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394                            "reduction: not ssa_name: ");
2395           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2396           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2397         }
2398       return NULL;
2399     }
2400
2401   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2402   if (!def_stmt)
2403     {
2404       if (dump_enabled_p ())
2405         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2406                          "reduction: no def_stmt.\n");
2407       return NULL;
2408     }
2409
2410   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2411     {
2412       if (dump_enabled_p ())
2413         {
2414           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2415           dump_printf (MSG_NOTE, "\n");
2416         }
2417       return NULL;
2418     }
2419
2420   if (is_gimple_assign (def_stmt))
2421     {
2422       name = gimple_assign_lhs (def_stmt);
2423       phi_def = false;
2424     }
2425   else
2426     {
2427       name = PHI_RESULT (def_stmt);
2428       phi_def = true;
2429     }
2430
2431   nloop_uses = 0;
2432   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2433     {
2434       gimple *use_stmt = USE_STMT (use_p);
2435       if (is_gimple_debug (use_stmt))
2436         continue;
2437       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2438         nloop_uses++;
2439       if (nloop_uses > 1)
2440         {
2441           if (dump_enabled_p ())
2442             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443                              "reduction used in loop.\n");
2444           return NULL;
2445         }
2446     }
2447
2448   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2449      defined in the inner loop.  */
2450   if (phi_def)
2451     {
2452       op1 = PHI_ARG_DEF (def_stmt, 0);
2453
2454       if (gimple_phi_num_args (def_stmt) != 1
2455           || TREE_CODE (op1) != SSA_NAME)
2456         {
2457           if (dump_enabled_p ())
2458             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459                              "unsupported phi node definition.\n");
2460
2461           return NULL;
2462         }
2463
2464       def1 = SSA_NAME_DEF_STMT (op1);
2465       if (gimple_bb (def1)
2466           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2467           && loop->inner
2468           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2469           && is_gimple_assign (def1))
2470         {
2471           if (dump_enabled_p ())
2472             report_vect_op (MSG_NOTE, def_stmt,
2473                             "detected double reduction: ");
2474
2475           *double_reduc = true;
2476           return def_stmt;
2477         }
2478
2479       return NULL;
2480     }
2481
2482   code = orig_code = gimple_assign_rhs_code (def_stmt);
2483
2484   /* We can handle "res -= x[i]", which is non-associative by
2485      simply rewriting this into "res += -x[i]".  Avoid changing
2486      gimple instruction for the first simple tests and only do this
2487      if we're allowed to change code at all.  */
2488   if (code == MINUS_EXPR
2489       && modify
2490       && (op1 = gimple_assign_rhs1 (def_stmt))
2491       && TREE_CODE (op1) == SSA_NAME
2492       && SSA_NAME_DEF_STMT (op1) == phi)
2493     code = PLUS_EXPR;
2494
2495   if (check_reduction
2496       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2497     {
2498       if (dump_enabled_p ())
2499         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2500                         "reduction: not commutative/associative: ");
2501       return NULL;
2502     }
2503
2504   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2505     {
2506       if (code != COND_EXPR)
2507         {
2508           if (dump_enabled_p ())
2509             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2510                             "reduction: not binary operation: ");
2511
2512           return NULL;
2513         }
2514
2515       op3 = gimple_assign_rhs1 (def_stmt);
2516       if (COMPARISON_CLASS_P (op3))
2517         {
2518           op4 = TREE_OPERAND (op3, 1);
2519           op3 = TREE_OPERAND (op3, 0);
2520         }
2521
2522       op1 = gimple_assign_rhs2 (def_stmt);
2523       op2 = gimple_assign_rhs3 (def_stmt);
2524
2525       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2526         {
2527           if (dump_enabled_p ())
2528             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2529                             "reduction: uses not ssa_names: ");
2530
2531           return NULL;
2532         }
2533     }
2534   else
2535     {
2536       op1 = gimple_assign_rhs1 (def_stmt);
2537       op2 = gimple_assign_rhs2 (def_stmt);
2538
2539       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2540         {
2541           if (dump_enabled_p ())
2542             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2543                             "reduction: uses not ssa_names: ");
2544
2545           return NULL;
2546         }
2547    }
2548
2549   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2550   if ((TREE_CODE (op1) == SSA_NAME
2551        && !types_compatible_p (type,TREE_TYPE (op1)))
2552       || (TREE_CODE (op2) == SSA_NAME
2553           && !types_compatible_p (type, TREE_TYPE (op2)))
2554       || (op3 && TREE_CODE (op3) == SSA_NAME
2555           && !types_compatible_p (type, TREE_TYPE (op3)))
2556       || (op4 && TREE_CODE (op4) == SSA_NAME
2557           && !types_compatible_p (type, TREE_TYPE (op4))))
2558     {
2559       if (dump_enabled_p ())
2560         {
2561           dump_printf_loc (MSG_NOTE, vect_location,
2562                            "reduction: multiple types: operation type: ");
2563           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2564           dump_printf (MSG_NOTE, ", operands types: ");
2565           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2566                              TREE_TYPE (op1));
2567           dump_printf (MSG_NOTE, ",");
2568           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2569                              TREE_TYPE (op2));
2570           if (op3)
2571             {
2572               dump_printf (MSG_NOTE, ",");
2573               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2574                                  TREE_TYPE (op3));
2575             }
2576
2577           if (op4)
2578             {
2579               dump_printf (MSG_NOTE, ",");
2580               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2581                                  TREE_TYPE (op4));
2582             }
2583           dump_printf (MSG_NOTE, "\n");
2584         }
2585
2586       return NULL;
2587     }
2588
2589   /* Check that it's ok to change the order of the computation.
2590      Generally, when vectorizing a reduction we change the order of the
2591      computation.  This may change the behavior of the program in some
2592      cases, so we need to check that this is ok.  One exception is when
2593      vectorizing an outer-loop: the inner-loop is executed sequentially,
2594      and therefore vectorizing reductions in the inner-loop during
2595      outer-loop vectorization is safe.  */
2596
2597   /* CHECKME: check for !flag_finite_math_only too?  */
2598   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2599       && check_reduction)
2600     {
2601       /* Changing the order of operations changes the semantics.  */
2602       if (dump_enabled_p ())
2603         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2604                         "reduction: unsafe fp math optimization: ");
2605       return NULL;
2606     }
2607   else if (INTEGRAL_TYPE_P (type) && check_reduction)
2608     {
2609       if (!operation_no_trapping_overflow (type, code))
2610         {
2611           /* Changing the order of operations changes the semantics.  */
2612           if (dump_enabled_p ())
2613             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2614                             "reduction: unsafe int math optimization"
2615                             " (overflow traps): ");
2616           return NULL;
2617         }
2618       if (need_wrapping_integral_overflow
2619           && !TYPE_OVERFLOW_WRAPS (type)
2620           && operation_can_overflow (code))
2621         {
2622           /* Changing the order of operations changes the semantics.  */
2623           if (dump_enabled_p ())
2624             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2625                             "reduction: unsafe int math optimization"
2626                             " (overflow doesn't wrap): ");
2627           return NULL;
2628         }
2629     }
2630   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2631     {
2632       /* Changing the order of operations changes the semantics.  */
2633       if (dump_enabled_p ())
2634         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2635                         "reduction: unsafe fixed-point math optimization: ");
2636       return NULL;
2637     }
2638
2639   /* If we detected "res -= x[i]" earlier, rewrite it into
2640      "res += -x[i]" now.  If this turns out to be useless reassoc
2641      will clean it up again.  */
2642   if (orig_code == MINUS_EXPR)
2643     {
2644       tree rhs = gimple_assign_rhs2 (def_stmt);
2645       tree negrhs = make_ssa_name (TREE_TYPE (rhs));
2646       gimple *negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
2647       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2648       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2649                                                           loop_info, NULL));
2650       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2651       gimple_assign_set_rhs2 (def_stmt, negrhs);
2652       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2653       update_stmt (def_stmt);
2654     }
2655
2656   /* Reduction is safe. We're dealing with one of the following:
2657      1) integer arithmetic and no trapv
2658      2) floating point arithmetic, and special flags permit this optimization
2659      3) nested cycle (i.e., outer loop vectorization).  */
2660   if (TREE_CODE (op1) == SSA_NAME)
2661     def1 = SSA_NAME_DEF_STMT (op1);
2662
2663   if (TREE_CODE (op2) == SSA_NAME)
2664     def2 = SSA_NAME_DEF_STMT (op2);
2665
2666   if (code != COND_EXPR
2667       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2668     {
2669       if (dump_enabled_p ())
2670         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2671       return NULL;
2672     }
2673
2674   /* Check that one def is the reduction def, defined by PHI,
2675      the other def is either defined in the loop ("vect_internal_def"),
2676      or it's an induction (defined by a loop-header phi-node).  */
2677
2678   if (def2 && def2 == phi
2679       && (code == COND_EXPR
2680           || !def1 || gimple_nop_p (def1)
2681           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2682           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2683               && (is_gimple_assign (def1)
2684                   || is_gimple_call (def1)
2685                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2686                       == vect_induction_def
2687                   || (gimple_code (def1) == GIMPLE_PHI
2688                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2689                           == vect_internal_def
2690                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2691     {
2692       if (dump_enabled_p ())
2693         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2694       return def_stmt;
2695     }
2696
2697   if (def1 && def1 == phi
2698       && (code == COND_EXPR
2699           || !def2 || gimple_nop_p (def2)
2700           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2701           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2702               && (is_gimple_assign (def2)
2703                   || is_gimple_call (def2)
2704                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2705                       == vect_induction_def
2706                   || (gimple_code (def2) == GIMPLE_PHI
2707                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2708                           == vect_internal_def
2709                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2710     {
2711       if (check_reduction)
2712         {
2713           /* Swap operands (just for simplicity - so that the rest of the code
2714              can assume that the reduction variable is always the last (second)
2715              argument).  */
2716           if (dump_enabled_p ())
2717             report_vect_op (MSG_NOTE, def_stmt,
2718                             "detected reduction: need to swap operands: ");
2719
2720           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2721                              gimple_assign_rhs2_ptr (def_stmt));
2722
2723           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2724             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2725         }
2726       else
2727         {
2728           if (dump_enabled_p ())
2729             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2730         }
2731
2732       return def_stmt;
2733     }
2734
2735   /* Try to find SLP reduction chain.  */
2736   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2737     {
2738       if (dump_enabled_p ())
2739         report_vect_op (MSG_NOTE, def_stmt,
2740                         "reduction: detected reduction chain: ");
2741
2742       return def_stmt;
2743     }
2744
2745   if (dump_enabled_p ())
2746     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2747                     "reduction: unknown pattern: ");
2748
2749   return NULL;
2750 }
2751
2752 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2753    in-place.  Arguments as there.  */
2754
2755 static gimple *
2756 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
2757                           bool check_reduction, bool *double_reduc,
2758                           bool need_wrapping_integral_overflow)
2759 {
2760   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2761                                      double_reduc, false,
2762                                      need_wrapping_integral_overflow);
2763 }
2764
2765 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2766    in-place if it enables detection of more reductions.  Arguments
2767    as there.  */
2768
2769 gimple *
2770 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
2771                              bool check_reduction, bool *double_reduc,
2772                              bool need_wrapping_integral_overflow)
2773 {
2774   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2775                                      double_reduc, true,
2776                                      need_wrapping_integral_overflow);
2777 }
2778
2779 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2780 int
2781 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2782                              int *peel_iters_epilogue,
2783                              stmt_vector_for_cost *scalar_cost_vec,
2784                              stmt_vector_for_cost *prologue_cost_vec,
2785                              stmt_vector_for_cost *epilogue_cost_vec)
2786 {
2787   int retval = 0;
2788   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2789
2790   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2791     {
2792       *peel_iters_epilogue = vf/2;
2793       if (dump_enabled_p ())
2794         dump_printf_loc (MSG_NOTE, vect_location,
2795                          "cost model: epilogue peel iters set to vf/2 "
2796                          "because loop iterations are unknown .\n");
2797
2798       /* If peeled iterations are known but number of scalar loop
2799          iterations are unknown, count a taken branch per peeled loop.  */
2800       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2801                                  NULL, 0, vect_prologue);
2802       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2803                                  NULL, 0, vect_epilogue);
2804     }
2805   else
2806     {
2807       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2808       peel_iters_prologue = niters < peel_iters_prologue ?
2809                             niters : peel_iters_prologue;
2810       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2811       /* If we need to peel for gaps, but no peeling is required, we have to
2812          peel VF iterations.  */
2813       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2814         *peel_iters_epilogue = vf;
2815     }
2816
2817   stmt_info_for_cost *si;
2818   int j;
2819   if (peel_iters_prologue)
2820     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2821       retval += record_stmt_cost (prologue_cost_vec,
2822                                   si->count * peel_iters_prologue,
2823                                   si->kind, NULL, si->misalign,
2824                                   vect_prologue);
2825   if (*peel_iters_epilogue)
2826     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2827       retval += record_stmt_cost (epilogue_cost_vec,
2828                                   si->count * *peel_iters_epilogue,
2829                                   si->kind, NULL, si->misalign,
2830                                   vect_epilogue);
2831
2832   return retval;
2833 }
2834
2835 /* Function vect_estimate_min_profitable_iters
2836
2837    Return the number of iterations required for the vector version of the
2838    loop to be profitable relative to the cost of the scalar version of the
2839    loop.  */
2840
2841 static void
2842 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2843                                     int *ret_min_profitable_niters,
2844                                     int *ret_min_profitable_estimate)
2845 {
2846   int min_profitable_iters;
2847   int min_profitable_estimate;
2848   int peel_iters_prologue;
2849   int peel_iters_epilogue;
2850   unsigned vec_inside_cost = 0;
2851   int vec_outside_cost = 0;
2852   unsigned vec_prologue_cost = 0;
2853   unsigned vec_epilogue_cost = 0;
2854   int scalar_single_iter_cost = 0;
2855   int scalar_outside_cost = 0;
2856   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2857   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2858   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2859
2860   /* Cost model disabled.  */
2861   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2862     {
2863       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2864       *ret_min_profitable_niters = 0;
2865       *ret_min_profitable_estimate = 0;
2866       return;
2867     }
2868
2869   /* Requires loop versioning tests to handle misalignment.  */
2870   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2871     {
2872       /*  FIXME: Make cost depend on complexity of individual check.  */
2873       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2874       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2875                             vect_prologue);
2876       dump_printf (MSG_NOTE,
2877                    "cost model: Adding cost of checks for loop "
2878                    "versioning to treat misalignment.\n");
2879     }
2880
2881   /* Requires loop versioning with alias checks.  */
2882   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2883     {
2884       /*  FIXME: Make cost depend on complexity of individual check.  */
2885       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2886       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2887                             vect_prologue);
2888       dump_printf (MSG_NOTE,
2889                    "cost model: Adding cost of checks for loop "
2890                    "versioning aliasing.\n");
2891     }
2892
2893   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2894       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2895     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2896                           vect_prologue);
2897
2898   /* Count statements in scalar loop.  Using this as scalar cost for a single
2899      iteration for now.
2900
2901      TODO: Add outer loop support.
2902
2903      TODO: Consider assigning different costs to different scalar
2904      statements.  */
2905
2906   scalar_single_iter_cost
2907     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
2908
2909   /* Add additional cost for the peeled instructions in prologue and epilogue
2910      loop.
2911
2912      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2913      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2914
2915      TODO: Build an expression that represents peel_iters for prologue and
2916      epilogue to be used in a run-time test.  */
2917
2918   if (npeel  < 0)
2919     {
2920       peel_iters_prologue = vf/2;
2921       dump_printf (MSG_NOTE, "cost model: "
2922                    "prologue peel iters set to vf/2.\n");
2923
2924       /* If peeling for alignment is unknown, loop bound of main loop becomes
2925          unknown.  */
2926       peel_iters_epilogue = vf/2;
2927       dump_printf (MSG_NOTE, "cost model: "
2928                    "epilogue peel iters set to vf/2 because "
2929                    "peeling for alignment is unknown.\n");
2930
2931       /* If peeled iterations are unknown, count a taken branch and a not taken
2932          branch per peeled loop. Even if scalar loop iterations are known,
2933          vector iterations are not known since peeled prologue iterations are
2934          not known. Hence guards remain the same.  */
2935       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
2936                             NULL, 0, vect_prologue);
2937       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
2938                             NULL, 0, vect_prologue);
2939       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
2940                             NULL, 0, vect_epilogue);
2941       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
2942                             NULL, 0, vect_epilogue);
2943       stmt_info_for_cost *si;
2944       int j;
2945       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
2946         {
2947           struct _stmt_vec_info *stmt_info
2948             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2949           (void) add_stmt_cost (target_cost_data,
2950                                 si->count * peel_iters_prologue,
2951                                 si->kind, stmt_info, si->misalign,
2952                                 vect_prologue);
2953           (void) add_stmt_cost (target_cost_data,
2954                                 si->count * peel_iters_epilogue,
2955                                 si->kind, stmt_info, si->misalign,
2956                                 vect_epilogue);
2957         }
2958     }
2959   else
2960     {
2961       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2962       stmt_info_for_cost *si;
2963       int j;
2964       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2965
2966       prologue_cost_vec.create (2);
2967       epilogue_cost_vec.create (2);
2968       peel_iters_prologue = npeel;
2969
2970       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2971                                           &peel_iters_epilogue,
2972                                           &LOOP_VINFO_SCALAR_ITERATION_COST
2973                                             (loop_vinfo),
2974                                           &prologue_cost_vec,
2975                                           &epilogue_cost_vec);
2976
2977       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2978         {
2979           struct _stmt_vec_info *stmt_info
2980             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2981           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2982                                 si->misalign, vect_prologue);
2983         }
2984
2985       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2986         {
2987           struct _stmt_vec_info *stmt_info
2988             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2989           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2990                                 si->misalign, vect_epilogue);
2991         }
2992
2993       prologue_cost_vec.release ();
2994       epilogue_cost_vec.release ();
2995     }
2996
2997   /* FORNOW: The scalar outside cost is incremented in one of the
2998      following ways:
2999
3000      1. The vectorizer checks for alignment and aliasing and generates
3001      a condition that allows dynamic vectorization.  A cost model
3002      check is ANDED with the versioning condition.  Hence scalar code
3003      path now has the added cost of the versioning check.
3004
3005        if (cost > th & versioning_check)
3006          jmp to vector code
3007
3008      Hence run-time scalar is incremented by not-taken branch cost.
3009
3010      2. The vectorizer then checks if a prologue is required.  If the
3011      cost model check was not done before during versioning, it has to
3012      be done before the prologue check.
3013
3014        if (cost <= th)
3015          prologue = scalar_iters
3016        if (prologue == 0)
3017          jmp to vector code
3018        else
3019          execute prologue
3020        if (prologue == num_iters)
3021          go to exit
3022
3023      Hence the run-time scalar cost is incremented by a taken branch,
3024      plus a not-taken branch, plus a taken branch cost.
3025
3026      3. The vectorizer then checks if an epilogue is required.  If the
3027      cost model check was not done before during prologue check, it
3028      has to be done with the epilogue check.
3029
3030        if (prologue == 0)
3031          jmp to vector code
3032        else
3033          execute prologue
3034        if (prologue == num_iters)
3035          go to exit
3036        vector code:
3037          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3038            jmp to epilogue
3039
3040      Hence the run-time scalar cost should be incremented by 2 taken
3041      branches.
3042
3043      TODO: The back end may reorder the BBS's differently and reverse
3044      conditions/branch directions.  Change the estimates below to
3045      something more reasonable.  */
3046
3047   /* If the number of iterations is known and we do not do versioning, we can
3048      decide whether to vectorize at compile time.  Hence the scalar version
3049      do not carry cost model guard costs.  */
3050   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3051       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3052       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3053     {
3054       /* Cost model check occurs at versioning.  */
3055       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3056           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3057         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3058       else
3059         {
3060           /* Cost model check occurs at prologue generation.  */
3061           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3062             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3063               + vect_get_stmt_cost (cond_branch_not_taken);
3064           /* Cost model check occurs at epilogue generation.  */
3065           else
3066             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3067         }
3068     }
3069
3070   /* Complete the target-specific cost calculations.  */
3071   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3072                &vec_inside_cost, &vec_epilogue_cost);
3073
3074   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3075
3076   if (dump_enabled_p ())
3077     {
3078       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3079       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3080                    vec_inside_cost);
3081       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3082                    vec_prologue_cost);
3083       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3084                    vec_epilogue_cost);
3085       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3086                    scalar_single_iter_cost);
3087       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3088                    scalar_outside_cost);
3089       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3090                    vec_outside_cost);
3091       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3092                    peel_iters_prologue);
3093       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3094                    peel_iters_epilogue);
3095     }
3096
3097   /* Calculate number of iterations required to make the vector version
3098      profitable, relative to the loop bodies only.  The following condition
3099      must hold true:
3100      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3101      where
3102      SIC = scalar iteration cost, VIC = vector iteration cost,
3103      VOC = vector outside cost, VF = vectorization factor,
3104      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3105      SOC = scalar outside cost for run time cost model check.  */
3106
3107   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3108     {
3109       if (vec_outside_cost <= 0)
3110         min_profitable_iters = 1;
3111       else
3112         {
3113           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3114                                   - vec_inside_cost * peel_iters_prologue
3115                                   - vec_inside_cost * peel_iters_epilogue)
3116                                  / ((scalar_single_iter_cost * vf)
3117                                     - vec_inside_cost);
3118
3119           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3120               <= (((int) vec_inside_cost * min_profitable_iters)
3121                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3122             min_profitable_iters++;
3123         }
3124     }
3125   /* vector version will never be profitable.  */
3126   else
3127     {
3128       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3129         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3130                     "did not happen for a simd loop");
3131
3132       if (dump_enabled_p ())
3133         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3134                          "cost model: the vector iteration cost = %d "
3135                          "divided by the scalar iteration cost = %d "
3136                          "is greater or equal to the vectorization factor = %d"
3137                          ".\n",
3138                          vec_inside_cost, scalar_single_iter_cost, vf);
3139       *ret_min_profitable_niters = -1;
3140       *ret_min_profitable_estimate = -1;
3141       return;
3142     }
3143
3144   dump_printf (MSG_NOTE,
3145                "  Calculated minimum iters for profitability: %d\n",
3146                min_profitable_iters);
3147
3148   min_profitable_iters =
3149         min_profitable_iters < vf ? vf : min_profitable_iters;
3150
3151   /* Because the condition we create is:
3152      if (niters <= min_profitable_iters)
3153        then skip the vectorized loop.  */
3154   min_profitable_iters--;
3155
3156   if (dump_enabled_p ())
3157     dump_printf_loc (MSG_NOTE, vect_location,
3158                      "  Runtime profitability threshold = %d\n",
3159                      min_profitable_iters);
3160
3161   *ret_min_profitable_niters = min_profitable_iters;
3162
3163   /* Calculate number of iterations required to make the vector version
3164      profitable, relative to the loop bodies only.
3165
3166      Non-vectorized variant is SIC * niters and it must win over vector
3167      variant on the expected loop trip count.  The following condition must hold true:
3168      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3169
3170   if (vec_outside_cost <= 0)
3171     min_profitable_estimate = 1;
3172   else
3173     {
3174       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3175                                  - vec_inside_cost * peel_iters_prologue
3176                                  - vec_inside_cost * peel_iters_epilogue)
3177                                  / ((scalar_single_iter_cost * vf)
3178                                    - vec_inside_cost);
3179     }
3180   min_profitable_estimate --;
3181   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3182   if (dump_enabled_p ())
3183     dump_printf_loc (MSG_NOTE, vect_location,
3184                      "  Static estimate profitability threshold = %d\n",
3185                       min_profitable_iters);
3186
3187   *ret_min_profitable_estimate = min_profitable_estimate;
3188 }
3189
3190 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3191    vector elements (not bits) for a vector of mode MODE.  */
3192 static void
3193 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3194                               unsigned char *sel)
3195 {
3196   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3197
3198   for (i = 0; i < nelt; i++)
3199     sel[i] = (i + offset) & (2*nelt - 1);
3200 }
3201
3202 /* Checks whether the target supports whole-vector shifts for vectors of mode
3203    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3204    it supports vec_perm_const with masks for all necessary shift amounts.  */
3205 static bool
3206 have_whole_vector_shift (enum machine_mode mode)
3207 {
3208   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3209     return true;
3210
3211   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3212     return false;
3213
3214   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3215   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3216
3217   for (i = nelt/2; i >= 1; i/=2)
3218     {
3219       calc_vec_perm_mask_for_shift (mode, i, sel);
3220       if (!can_vec_perm_p (mode, false, sel))
3221         return false;
3222     }
3223   return true;
3224 }
3225
3226 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3227
3228 static tree
3229 get_reduction_op (gimple *stmt, int reduc_index)
3230 {
3231   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3232     {
3233     case GIMPLE_SINGLE_RHS:
3234       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3235                   == ternary_op);
3236       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3237     case GIMPLE_UNARY_RHS:
3238       return gimple_assign_rhs1 (stmt);
3239     case GIMPLE_BINARY_RHS:
3240       return (reduc_index
3241               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3242     case GIMPLE_TERNARY_RHS:
3243       return gimple_op (stmt, reduc_index + 1);
3244     default:
3245       gcc_unreachable ();
3246     }
3247 }
3248
3249 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3250    functions. Design better to avoid maintenance issues.  */
3251
3252 /* Function vect_model_reduction_cost.
3253
3254    Models cost for a reduction operation, including the vector ops
3255    generated within the strip-mine loop, the initial definition before
3256    the loop, and the epilogue code that must be generated.  */
3257
3258 static bool
3259 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3260                            int ncopies, int reduc_index)
3261 {
3262   int prologue_cost = 0, epilogue_cost = 0;
3263   enum tree_code code;
3264   optab optab;
3265   tree vectype;
3266   gimple *stmt, *orig_stmt;
3267   tree reduction_op;
3268   machine_mode mode;
3269   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3270   struct loop *loop = NULL;
3271   void *target_cost_data;
3272
3273   if (loop_vinfo)
3274     {
3275       loop = LOOP_VINFO_LOOP (loop_vinfo);
3276       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3277     }
3278   else
3279     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3280
3281   /* Cost of reduction op inside loop.  */
3282   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3283                                         stmt_info, 0, vect_body);
3284   stmt = STMT_VINFO_STMT (stmt_info);
3285
3286   reduction_op = get_reduction_op (stmt, reduc_index);
3287
3288   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3289   if (!vectype)
3290     {
3291       if (dump_enabled_p ())
3292         {
3293           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3294                            "unsupported data-type ");
3295           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3296                              TREE_TYPE (reduction_op));
3297           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3298         }
3299       return false;
3300    }
3301
3302   mode = TYPE_MODE (vectype);
3303   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3304
3305   if (!orig_stmt)
3306     orig_stmt = STMT_VINFO_STMT (stmt_info);
3307
3308   code = gimple_assign_rhs_code (orig_stmt);
3309
3310   /* Add in cost for initial definition.  */
3311   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3312                                   stmt_info, 0, vect_prologue);
3313
3314   /* Determine cost of epilogue code.
3315
3316      We have a reduction operator that will reduce the vector in one statement.
3317      Also requires scalar extract.  */
3318
3319   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3320     {
3321       if (reduc_code != ERROR_MARK)
3322         {
3323           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3324                                           stmt_info, 0, vect_epilogue);
3325           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3326                                           stmt_info, 0, vect_epilogue);
3327         }
3328       else
3329         {
3330           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3331           tree bitsize =
3332             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3333           int element_bitsize = tree_to_uhwi (bitsize);
3334           int nelements = vec_size_in_bits / element_bitsize;
3335
3336           optab = optab_for_tree_code (code, vectype, optab_default);
3337
3338           /* We have a whole vector shift available.  */
3339           if (VECTOR_MODE_P (mode)
3340               && optab_handler (optab, mode) != CODE_FOR_nothing
3341               && have_whole_vector_shift (mode))
3342             {
3343               /* Final reduction via vector shifts and the reduction operator.
3344                  Also requires scalar extract.  */
3345               epilogue_cost += add_stmt_cost (target_cost_data,
3346                                               exact_log2 (nelements) * 2,
3347                                               vector_stmt, stmt_info, 0,
3348                                               vect_epilogue);
3349               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3350                                               vec_to_scalar, stmt_info, 0,
3351                                               vect_epilogue);
3352             }
3353           else
3354             /* Use extracts and reduction op for final reduction.  For N
3355                elements, we have N extracts and N-1 reduction ops.  */
3356             epilogue_cost += add_stmt_cost (target_cost_data,
3357                                             nelements + nelements - 1,
3358                                             vector_stmt, stmt_info, 0,
3359                                             vect_epilogue);
3360         }
3361     }
3362
3363   if (dump_enabled_p ())
3364     dump_printf (MSG_NOTE,
3365                  "vect_model_reduction_cost: inside_cost = %d, "
3366                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3367                  prologue_cost, epilogue_cost);
3368
3369   return true;
3370 }
3371
3372
3373 /* Function vect_model_induction_cost.
3374
3375    Models cost for induction operations.  */
3376
3377 static void
3378 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3379 {
3380   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3381   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3382   unsigned inside_cost, prologue_cost;
3383
3384   /* loop cost for vec_loop.  */
3385   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3386                                stmt_info, 0, vect_body);
3387
3388   /* prologue cost for vec_init and vec_step.  */
3389   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3390                                  stmt_info, 0, vect_prologue);
3391
3392   if (dump_enabled_p ())
3393     dump_printf_loc (MSG_NOTE, vect_location,
3394                      "vect_model_induction_cost: inside_cost = %d, "
3395                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3396 }
3397
3398
3399 /* Function get_initial_def_for_induction
3400
3401    Input:
3402    STMT - a stmt that performs an induction operation in the loop.
3403    IV_PHI - the initial value of the induction variable
3404
3405    Output:
3406    Return a vector variable, initialized with the first VF values of
3407    the induction variable.  E.g., for an iv with IV_PHI='X' and
3408    evolution S, for a vector of 4 units, we want to return:
3409    [X, X + S, X + 2*S, X + 3*S].  */
3410
3411 static tree
3412 get_initial_def_for_induction (gimple *iv_phi)
3413 {
3414   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3415   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3416   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3417   tree vectype;
3418   int nunits;
3419   edge pe = loop_preheader_edge (loop);
3420   struct loop *iv_loop;
3421   basic_block new_bb;
3422   tree new_vec, vec_init, vec_step, t;
3423   tree new_var;
3424   tree new_name;
3425   gimple *init_stmt, *new_stmt;
3426   gphi *induction_phi;
3427   tree induc_def, vec_def, vec_dest;
3428   tree init_expr, step_expr;
3429   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3430   int i;
3431   int ncopies;
3432   tree expr;
3433   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3434   bool nested_in_vect_loop = false;
3435   gimple_seq stmts = NULL;
3436   imm_use_iterator imm_iter;
3437   use_operand_p use_p;
3438   gimple *exit_phi;
3439   edge latch_e;
3440   tree loop_arg;
3441   gimple_stmt_iterator si;
3442   basic_block bb = gimple_bb (iv_phi);
3443   tree stepvectype;
3444   tree resvectype;
3445
3446   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3447   if (nested_in_vect_loop_p (loop, iv_phi))
3448     {
3449       nested_in_vect_loop = true;
3450       iv_loop = loop->inner;
3451     }
3452   else
3453     iv_loop = loop;
3454   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3455
3456   latch_e = loop_latch_edge (iv_loop);
3457   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3458
3459   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3460   gcc_assert (step_expr != NULL_TREE);
3461
3462   pe = loop_preheader_edge (iv_loop);
3463   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3464                                      loop_preheader_edge (iv_loop));
3465
3466   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3467   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3468   gcc_assert (vectype);
3469   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3470   ncopies = vf / nunits;
3471
3472   gcc_assert (phi_info);
3473   gcc_assert (ncopies >= 1);
3474
3475   /* Convert the step to the desired type.  */
3476   step_expr = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3477                                                   step_expr),
3478                                     &stmts, true, NULL_TREE);
3479   if (stmts)
3480     {
3481       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3482       gcc_assert (!new_bb);
3483     }
3484
3485   /* Find the first insertion point in the BB.  */
3486   si = gsi_after_labels (bb);
3487
3488   /* Create the vector that holds the initial_value of the induction.  */
3489   if (nested_in_vect_loop)
3490     {
3491       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3492          been created during vectorization of previous stmts.  We obtain it
3493          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3494       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi, NULL);
3495       /* If the initial value is not of proper type, convert it.  */
3496       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3497         {
3498           new_stmt
3499             = gimple_build_assign (vect_get_new_vect_var (vectype,
3500                                                           vect_simple_var,
3501                                                           "vec_iv_"),
3502                                    VIEW_CONVERT_EXPR,
3503                                    build1 (VIEW_CONVERT_EXPR, vectype,
3504                                            vec_init));
3505           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3506           gimple_assign_set_lhs (new_stmt, vec_init);
3507           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3508                                                  new_stmt);
3509           gcc_assert (!new_bb);
3510           set_vinfo_for_stmt (new_stmt,
3511                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3512         }
3513     }
3514   else
3515     {
3516       vec<constructor_elt, va_gc> *v;
3517
3518       /* iv_loop is the loop to be vectorized. Create:
3519          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3520       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3521                                        vect_scalar_var, "var_");
3522       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3523                                                      init_expr),
3524                                        &stmts, false, new_var);
3525       if (stmts)
3526         {
3527           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3528           gcc_assert (!new_bb);
3529         }
3530
3531       vec_alloc (v, nunits);
3532       bool constant_p = is_gimple_min_invariant (new_name);
3533       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3534       for (i = 1; i < nunits; i++)
3535         {
3536           /* Create: new_name_i = new_name + step_expr  */
3537           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3538                                   new_name, step_expr);
3539           if (!is_gimple_min_invariant (new_name))
3540             {
3541               init_stmt = gimple_build_assign (new_var, new_name);
3542               new_name = make_ssa_name (new_var, init_stmt);
3543               gimple_assign_set_lhs (init_stmt, new_name);
3544               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3545               gcc_assert (!new_bb);
3546               if (dump_enabled_p ())
3547                 {
3548                   dump_printf_loc (MSG_NOTE, vect_location,
3549                                    "created new init_stmt: ");
3550                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3551                   dump_printf (MSG_NOTE, "\n");
3552                 }
3553               constant_p = false;
3554             }
3555           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3556         }
3557       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3558       if (constant_p)
3559         new_vec = build_vector_from_ctor (vectype, v);
3560       else
3561         new_vec = build_constructor (vectype, v);
3562       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3563     }
3564
3565
3566   /* Create the vector that holds the step of the induction.  */
3567   if (nested_in_vect_loop)
3568     /* iv_loop is nested in the loop to be vectorized. Generate:
3569        vec_step = [S, S, S, S]  */
3570     new_name = step_expr;
3571   else
3572     {
3573       /* iv_loop is the loop to be vectorized. Generate:
3574           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3575       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3576         {
3577           expr = build_int_cst (integer_type_node, vf);
3578           expr = fold_convert (TREE_TYPE (step_expr), expr);
3579         }
3580       else
3581         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3582       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3583                               expr, step_expr);
3584       if (TREE_CODE (step_expr) == SSA_NAME)
3585         new_name = vect_init_vector (iv_phi, new_name,
3586                                      TREE_TYPE (step_expr), NULL);
3587     }
3588
3589   t = unshare_expr (new_name);
3590   gcc_assert (CONSTANT_CLASS_P (new_name)
3591               || TREE_CODE (new_name) == SSA_NAME);
3592   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3593   gcc_assert (stepvectype);
3594   new_vec = build_vector_from_val (stepvectype, t);
3595   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3596
3597
3598   /* Create the following def-use cycle:
3599      loop prolog:
3600          vec_init = ...
3601          vec_step = ...
3602      loop:
3603          vec_iv = PHI <vec_init, vec_loop>
3604          ...
3605          STMT
3606          ...
3607          vec_loop = vec_iv + vec_step;  */
3608
3609   /* Create the induction-phi that defines the induction-operand.  */
3610   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3611   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3612   set_vinfo_for_stmt (induction_phi,
3613                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3614   induc_def = PHI_RESULT (induction_phi);
3615
3616   /* Create the iv update inside the loop  */
3617   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3618   vec_def = make_ssa_name (vec_dest, new_stmt);
3619   gimple_assign_set_lhs (new_stmt, vec_def);
3620   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3621   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3622                                                    NULL));
3623
3624   /* Set the arguments of the phi node:  */
3625   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3626   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3627                UNKNOWN_LOCATION);
3628
3629
3630   /* In case that vectorization factor (VF) is bigger than the number
3631      of elements that we can fit in a vectype (nunits), we have to generate
3632      more than one vector stmt - i.e - we need to "unroll" the
3633      vector stmt by a factor VF/nunits.  For more details see documentation
3634      in vectorizable_operation.  */
3635
3636   if (ncopies > 1)
3637     {
3638       stmt_vec_info prev_stmt_vinfo;
3639       /* FORNOW. This restriction should be relaxed.  */
3640       gcc_assert (!nested_in_vect_loop);
3641
3642       /* Create the vector that holds the step of the induction.  */
3643       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3644         {
3645           expr = build_int_cst (integer_type_node, nunits);
3646           expr = fold_convert (TREE_TYPE (step_expr), expr);
3647         }
3648       else
3649         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3650       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3651                               expr, step_expr);
3652       if (TREE_CODE (step_expr) == SSA_NAME)
3653         new_name = vect_init_vector (iv_phi, new_name,
3654                                      TREE_TYPE (step_expr), NULL);
3655       t = unshare_expr (new_name);
3656       gcc_assert (CONSTANT_CLASS_P (new_name)
3657                   || TREE_CODE (new_name) == SSA_NAME);
3658       new_vec = build_vector_from_val (stepvectype, t);
3659       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3660
3661       vec_def = induc_def;
3662       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3663       for (i = 1; i < ncopies; i++)
3664         {
3665           /* vec_i = vec_prev + vec_step  */
3666           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3667                                           vec_def, vec_step);
3668           vec_def = make_ssa_name (vec_dest, new_stmt);
3669           gimple_assign_set_lhs (new_stmt, vec_def);
3670
3671           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3672           if (!useless_type_conversion_p (resvectype, vectype))
3673             {
3674               new_stmt
3675                 = gimple_build_assign
3676                         (vect_get_new_vect_var (resvectype, vect_simple_var,
3677                                                 "vec_iv_"),
3678                          VIEW_CONVERT_EXPR,
3679                          build1 (VIEW_CONVERT_EXPR, resvectype,
3680                                  gimple_assign_lhs (new_stmt)));
3681               gimple_assign_set_lhs (new_stmt,
3682                                      make_ssa_name
3683                                        (gimple_assign_lhs (new_stmt), new_stmt));
3684               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3685             }
3686           set_vinfo_for_stmt (new_stmt,
3687                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3688           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3689           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3690         }
3691     }
3692
3693   if (nested_in_vect_loop)
3694     {
3695       /* Find the loop-closed exit-phi of the induction, and record
3696          the final vector of induction results:  */
3697       exit_phi = NULL;
3698       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3699         {
3700           gimple *use_stmt = USE_STMT (use_p);
3701           if (is_gimple_debug (use_stmt))
3702             continue;
3703
3704           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3705             {
3706               exit_phi = use_stmt;
3707               break;
3708             }
3709         }
3710       if (exit_phi)
3711         {
3712           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3713           /* FORNOW. Currently not supporting the case that an inner-loop induction
3714              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3715           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3716                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3717
3718           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3719           if (dump_enabled_p ())
3720             {
3721               dump_printf_loc (MSG_NOTE, vect_location,
3722                                "vector of inductions after inner-loop:");
3723               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3724               dump_printf (MSG_NOTE, "\n");
3725             }
3726         }
3727     }
3728
3729
3730   if (dump_enabled_p ())
3731     {
3732       dump_printf_loc (MSG_NOTE, vect_location,
3733                        "transform induction: created def-use cycle: ");
3734       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3735       dump_printf (MSG_NOTE, "\n");
3736       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3737                         SSA_NAME_DEF_STMT (vec_def), 0);
3738       dump_printf (MSG_NOTE, "\n");
3739     }
3740
3741   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3742   if (!useless_type_conversion_p (resvectype, vectype))
3743     {
3744       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3745                                                              vect_simple_var,
3746                                                              "vec_iv_"),
3747                                       VIEW_CONVERT_EXPR,
3748                                       build1 (VIEW_CONVERT_EXPR, resvectype,
3749                                               induc_def));
3750       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3751       gimple_assign_set_lhs (new_stmt, induc_def);
3752       si = gsi_after_labels (bb);
3753       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3754       set_vinfo_for_stmt (new_stmt,
3755                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3756       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3757         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3758     }
3759
3760   return induc_def;
3761 }
3762
3763
3764 /* Function get_initial_def_for_reduction
3765
3766    Input:
3767    STMT - a stmt that performs a reduction operation in the loop.
3768    INIT_VAL - the initial value of the reduction variable
3769
3770    Output:
3771    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3772         of the reduction (used for adjusting the epilog - see below).
3773    Return a vector variable, initialized according to the operation that STMT
3774         performs. This vector will be used as the initial value of the
3775         vector of partial results.
3776
3777    Option1 (adjust in epilog): Initialize the vector as follows:
3778      add/bit or/xor:    [0,0,...,0,0]
3779      mult/bit and:      [1,1,...,1,1]
3780      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3781    and when necessary (e.g. add/mult case) let the caller know
3782    that it needs to adjust the result by init_val.
3783
3784    Option2: Initialize the vector as follows:
3785      add/bit or/xor:    [init_val,0,0,...,0]
3786      mult/bit and:      [init_val,1,1,...,1]
3787      min/max/cond_expr: [init_val,init_val,...,init_val]
3788    and no adjustments are needed.
3789
3790    For example, for the following code:
3791
3792    s = init_val;
3793    for (i=0;i<n;i++)
3794      s = s + a[i];
3795
3796    STMT is 's = s + a[i]', and the reduction variable is 's'.
3797    For a vector of 4 units, we want to return either [0,0,0,init_val],
3798    or [0,0,0,0] and let the caller know that it needs to adjust
3799    the result at the end by 'init_val'.
3800
3801    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3802    initialization vector is simpler (same element in all entries), if
3803    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3804
3805    A cost model should help decide between these two schemes.  */
3806
3807 tree
3808 get_initial_def_for_reduction (gimple *stmt, tree init_val,
3809                                tree *adjustment_def)
3810 {
3811   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3812   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3813   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3814   tree scalar_type = TREE_TYPE (init_val);
3815   tree vectype = get_vectype_for_scalar_type (scalar_type);
3816   int nunits;
3817   enum tree_code code = gimple_assign_rhs_code (stmt);
3818   tree def_for_init;
3819   tree init_def;
3820   tree *elts;
3821   int i;
3822   bool nested_in_vect_loop = false;
3823   tree init_value;
3824   REAL_VALUE_TYPE real_init_val = dconst0;
3825   int int_init_val = 0;
3826   gimple *def_stmt = NULL;
3827
3828   gcc_assert (vectype);
3829   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3830
3831   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3832               || SCALAR_FLOAT_TYPE_P (scalar_type));
3833
3834   if (nested_in_vect_loop_p (loop, stmt))
3835     nested_in_vect_loop = true;
3836   else
3837     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3838
3839   /* In case of double reduction we only create a vector variable to be put
3840      in the reduction phi node.  The actual statement creation is done in
3841      vect_create_epilog_for_reduction.  */
3842   if (adjustment_def && nested_in_vect_loop
3843       && TREE_CODE (init_val) == SSA_NAME
3844       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3845       && gimple_code (def_stmt) == GIMPLE_PHI
3846       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3847       && vinfo_for_stmt (def_stmt)
3848       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3849           == vect_double_reduction_def)
3850     {
3851       *adjustment_def = NULL;
3852       return vect_create_destination_var (init_val, vectype);
3853     }
3854
3855   if (TREE_CONSTANT (init_val))
3856     {
3857       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3858         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3859       else
3860         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3861     }
3862   else
3863     init_value = init_val;
3864
3865   switch (code)
3866     {
3867       case WIDEN_SUM_EXPR:
3868       case DOT_PROD_EXPR:
3869       case SAD_EXPR:
3870       case PLUS_EXPR:
3871       case MINUS_EXPR:
3872       case BIT_IOR_EXPR:
3873       case BIT_XOR_EXPR:
3874       case MULT_EXPR:
3875       case BIT_AND_EXPR:
3876         /* ADJUSMENT_DEF is NULL when called from
3877            vect_create_epilog_for_reduction to vectorize double reduction.  */
3878         if (adjustment_def)
3879           {
3880             if (nested_in_vect_loop)
3881               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3882                                                               NULL);
3883             else
3884               *adjustment_def = init_val;
3885           }
3886
3887         if (code == MULT_EXPR)
3888           {
3889             real_init_val = dconst1;
3890             int_init_val = 1;
3891           }
3892
3893         if (code == BIT_AND_EXPR)
3894           int_init_val = -1;
3895
3896         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3897           def_for_init = build_real (scalar_type, real_init_val);
3898         else
3899           def_for_init = build_int_cst (scalar_type, int_init_val);
3900
3901         /* Create a vector of '0' or '1' except the first element.  */
3902         elts = XALLOCAVEC (tree, nunits);
3903         for (i = nunits - 2; i >= 0; --i)
3904           elts[i + 1] = def_for_init;
3905
3906         /* Option1: the first element is '0' or '1' as well.  */
3907         if (adjustment_def)
3908           {
3909             elts[0] = def_for_init;
3910             init_def = build_vector (vectype, elts);
3911             break;
3912           }
3913
3914         /* Option2: the first element is INIT_VAL.  */
3915         elts[0] = init_val;
3916         if (TREE_CONSTANT (init_val))
3917           init_def = build_vector (vectype, elts);
3918         else
3919           {
3920             vec<constructor_elt, va_gc> *v;
3921             vec_alloc (v, nunits);
3922             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3923             for (i = 1; i < nunits; ++i)
3924               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3925             init_def = build_constructor (vectype, v);
3926           }
3927
3928         break;
3929
3930       case MIN_EXPR:
3931       case MAX_EXPR:
3932       case COND_EXPR:
3933         if (adjustment_def)
3934           {
3935             *adjustment_def = NULL_TREE;
3936             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3937             break;
3938           }
3939
3940         init_def = build_vector_from_val (vectype, init_value);
3941         break;
3942
3943       default:
3944         gcc_unreachable ();
3945     }
3946
3947   return init_def;
3948 }
3949
3950 /* Function vect_create_epilog_for_reduction
3951
3952    Create code at the loop-epilog to finalize the result of a reduction
3953    computation.
3954
3955    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3956      reduction statements.
3957    STMT is the scalar reduction stmt that is being vectorized.
3958    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3959      number of elements that we can fit in a vectype (nunits).  In this case
3960      we have to generate more than one vector stmt - i.e - we need to "unroll"
3961      the vector stmt by a factor VF/nunits.  For more details see documentation
3962      in vectorizable_operation.
3963    REDUC_CODE is the tree-code for the epilog reduction.
3964    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3965      computation.
3966    REDUC_INDEX is the index of the operand in the right hand side of the
3967      statement that is defined by REDUCTION_PHI.
3968    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3969    SLP_NODE is an SLP node containing a group of reduction statements. The
3970      first one in this group is STMT.
3971
3972    This function:
3973    1. Creates the reduction def-use cycles: sets the arguments for
3974       REDUCTION_PHIS:
3975       The loop-entry argument is the vectorized initial-value of the reduction.
3976       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3977       sums.
3978    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3979       by applying the operation specified by REDUC_CODE if available, or by
3980       other means (whole-vector shifts or a scalar loop).
3981       The function also creates a new phi node at the loop exit to preserve
3982       loop-closed form, as illustrated below.
3983
3984      The flow at the entry to this function:
3985
3986         loop:
3987           vec_def = phi <null, null>            # REDUCTION_PHI
3988           VECT_DEF = vector_stmt                # vectorized form of STMT
3989           s_loop = scalar_stmt                  # (scalar) STMT
3990         loop_exit:
3991           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3992           use <s_out0>
3993           use <s_out0>
3994
3995      The above is transformed by this function into:
3996
3997         loop:
3998           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3999           VECT_DEF = vector_stmt                # vectorized form of STMT
4000           s_loop = scalar_stmt                  # (scalar) STMT
4001         loop_exit:
4002           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4003           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4004           v_out2 = reduce <v_out1>
4005           s_out3 = extract_field <v_out2, 0>
4006           s_out4 = adjust_result <s_out3>
4007           use <s_out4>
4008           use <s_out4>
4009 */
4010
4011 static void
4012 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
4013                                   int ncopies, enum tree_code reduc_code,
4014                                   vec<gimple *> reduction_phis,
4015                                   int reduc_index, bool double_reduc,
4016                                   slp_tree slp_node)
4017 {
4018   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4019   stmt_vec_info prev_phi_info;
4020   tree vectype;
4021   machine_mode mode;
4022   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4023   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4024   basic_block exit_bb;
4025   tree scalar_dest;
4026   tree scalar_type;
4027   gimple *new_phi = NULL, *phi;
4028   gimple_stmt_iterator exit_gsi;
4029   tree vec_dest;
4030   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4031   gimple *epilog_stmt = NULL;
4032   enum tree_code code = gimple_assign_rhs_code (stmt);
4033   gimple *exit_phi;
4034   tree bitsize;
4035   tree adjustment_def = NULL;
4036   tree vec_initial_def = NULL;
4037   tree reduction_op, expr, def;
4038   tree orig_name, scalar_result;
4039   imm_use_iterator imm_iter, phi_imm_iter;
4040   use_operand_p use_p, phi_use_p;
4041   gimple *use_stmt, *orig_stmt, *reduction_phi = NULL;
4042   bool nested_in_vect_loop = false;
4043   auto_vec<gimple *> new_phis;
4044   auto_vec<gimple *> inner_phis;
4045   enum vect_def_type dt = vect_unknown_def_type;
4046   int j, i;
4047   auto_vec<tree> scalar_results;
4048   unsigned int group_size = 1, k, ratio;
4049   auto_vec<tree> vec_initial_defs;
4050   auto_vec<gimple *> phis;
4051   bool slp_reduc = false;
4052   tree new_phi_result;
4053   gimple *inner_phi = NULL;
4054
4055   if (slp_node)
4056     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4057
4058   if (nested_in_vect_loop_p (loop, stmt))
4059     {
4060       outer_loop = loop;
4061       loop = loop->inner;
4062       nested_in_vect_loop = true;
4063       gcc_assert (!slp_node);
4064     }
4065
4066   reduction_op = get_reduction_op (stmt, reduc_index);
4067
4068   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4069   gcc_assert (vectype);
4070   mode = TYPE_MODE (vectype);
4071
4072   /* 1. Create the reduction def-use cycle:
4073      Set the arguments of REDUCTION_PHIS, i.e., transform
4074
4075         loop:
4076           vec_def = phi <null, null>            # REDUCTION_PHI
4077           VECT_DEF = vector_stmt                # vectorized form of STMT
4078           ...
4079
4080      into:
4081
4082         loop:
4083           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4084           VECT_DEF = vector_stmt                # vectorized form of STMT
4085           ...
4086
4087      (in case of SLP, do it for all the phis). */
4088
4089   /* Get the loop-entry arguments.  */
4090   if (slp_node)
4091     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4092                        NULL, slp_node, reduc_index);
4093   else
4094     {
4095       vec_initial_defs.create (1);
4096      /* For the case of reduction, vect_get_vec_def_for_operand returns
4097         the scalar def before the loop, that defines the initial value
4098         of the reduction variable.  */
4099       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
4100                                                       &adjustment_def);
4101       vec_initial_defs.quick_push (vec_initial_def);
4102     }
4103
4104   /* Set phi nodes arguments.  */
4105   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4106     {
4107       tree vec_init_def, def;
4108       gimple_seq stmts;
4109       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4110                                            true, NULL_TREE);
4111       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4112       def = vect_defs[i];
4113       for (j = 0; j < ncopies; j++)
4114         {
4115           /* Set the loop-entry arg of the reduction-phi.  */
4116           add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4117                        loop_preheader_edge (loop), UNKNOWN_LOCATION);
4118
4119           /* Set the loop-latch arg for the reduction-phi.  */
4120           if (j > 0)
4121             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4122
4123           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4124                        UNKNOWN_LOCATION);
4125
4126           if (dump_enabled_p ())
4127             {
4128               dump_printf_loc (MSG_NOTE, vect_location,
4129                                "transform reduction: created def-use cycle: ");
4130               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4131               dump_printf (MSG_NOTE, "\n");
4132               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4133               dump_printf (MSG_NOTE, "\n");
4134             }
4135
4136           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4137         }
4138     }
4139
4140   /* 2. Create epilog code.
4141         The reduction epilog code operates across the elements of the vector
4142         of partial results computed by the vectorized loop.
4143         The reduction epilog code consists of:
4144
4145         step 1: compute the scalar result in a vector (v_out2)
4146         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4147         step 3: adjust the scalar result (s_out3) if needed.
4148
4149         Step 1 can be accomplished using one the following three schemes:
4150           (scheme 1) using reduc_code, if available.
4151           (scheme 2) using whole-vector shifts, if available.
4152           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4153                      combined.
4154
4155           The overall epilog code looks like this:
4156
4157           s_out0 = phi <s_loop>         # original EXIT_PHI
4158           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4159           v_out2 = reduce <v_out1>              # step 1
4160           s_out3 = extract_field <v_out2, 0>    # step 2
4161           s_out4 = adjust_result <s_out3>       # step 3
4162
4163           (step 3 is optional, and steps 1 and 2 may be combined).
4164           Lastly, the uses of s_out0 are replaced by s_out4.  */
4165
4166
4167   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4168          v_out1 = phi <VECT_DEF>
4169          Store them in NEW_PHIS.  */
4170
4171   exit_bb = single_exit (loop)->dest;
4172   prev_phi_info = NULL;
4173   new_phis.create (vect_defs.length ());
4174   FOR_EACH_VEC_ELT (vect_defs, i, def)
4175     {
4176       for (j = 0; j < ncopies; j++)
4177         {
4178           tree new_def = copy_ssa_name (def);
4179           phi = create_phi_node (new_def, exit_bb);
4180           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
4181           if (j == 0)
4182             new_phis.quick_push (phi);
4183           else
4184             {
4185               def = vect_get_vec_def_for_stmt_copy (dt, def);
4186               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4187             }
4188
4189           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4190           prev_phi_info = vinfo_for_stmt (phi);
4191         }
4192     }
4193
4194   /* The epilogue is created for the outer-loop, i.e., for the loop being
4195      vectorized.  Create exit phis for the outer loop.  */
4196   if (double_reduc)
4197     {
4198       loop = outer_loop;
4199       exit_bb = single_exit (loop)->dest;
4200       inner_phis.create (vect_defs.length ());
4201       FOR_EACH_VEC_ELT (new_phis, i, phi)
4202         {
4203           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4204           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4205           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4206                            PHI_RESULT (phi));
4207           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4208                                                             loop_vinfo, NULL));
4209           inner_phis.quick_push (phi);
4210           new_phis[i] = outer_phi;
4211           prev_phi_info = vinfo_for_stmt (outer_phi);
4212           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4213             {
4214               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4215               new_result = copy_ssa_name (PHI_RESULT (phi));
4216               outer_phi = create_phi_node (new_result, exit_bb);
4217               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4218                                PHI_RESULT (phi));
4219               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4220                                                         loop_vinfo, NULL));
4221               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4222               prev_phi_info = vinfo_for_stmt (outer_phi);
4223             }
4224         }
4225     }
4226
4227   exit_gsi = gsi_after_labels (exit_bb);
4228
4229   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4230          (i.e. when reduc_code is not available) and in the final adjustment
4231          code (if needed).  Also get the original scalar reduction variable as
4232          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4233          represents a reduction pattern), the tree-code and scalar-def are
4234          taken from the original stmt that the pattern-stmt (STMT) replaces.
4235          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4236          are taken from STMT.  */
4237
4238   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4239   if (!orig_stmt)
4240     {
4241       /* Regular reduction  */
4242       orig_stmt = stmt;
4243     }
4244   else
4245     {
4246       /* Reduction pattern  */
4247       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4248       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4249       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4250     }
4251
4252   code = gimple_assign_rhs_code (orig_stmt);
4253   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4254      partial results are added and not subtracted.  */
4255   if (code == MINUS_EXPR)
4256     code = PLUS_EXPR;
4257
4258   scalar_dest = gimple_assign_lhs (orig_stmt);
4259   scalar_type = TREE_TYPE (scalar_dest);
4260   scalar_results.create (group_size);
4261   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4262   bitsize = TYPE_SIZE (scalar_type);
4263
4264   /* In case this is a reduction in an inner-loop while vectorizing an outer
4265      loop - we don't need to extract a single scalar result at the end of the
4266      inner-loop (unless it is double reduction, i.e., the use of reduction is
4267      outside the outer-loop).  The final vector of partial results will be used
4268      in the vectorized outer-loop, or reduced to a scalar result at the end of
4269      the outer-loop.  */
4270   if (nested_in_vect_loop && !double_reduc)
4271     goto vect_finalize_reduction;
4272
4273   /* SLP reduction without reduction chain, e.g.,
4274      # a1 = phi <a2, a0>
4275      # b1 = phi <b2, b0>
4276      a2 = operation (a1)
4277      b2 = operation (b1)  */
4278   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4279
4280   /* In case of reduction chain, e.g.,
4281      # a1 = phi <a3, a0>
4282      a2 = operation (a1)
4283      a3 = operation (a2),
4284
4285      we may end up with more than one vector result.  Here we reduce them to
4286      one vector.  */
4287   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4288     {
4289       tree first_vect = PHI_RESULT (new_phis[0]);
4290       tree tmp;
4291       gassign *new_vec_stmt = NULL;
4292
4293       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4294       for (k = 1; k < new_phis.length (); k++)
4295         {
4296           gimple *next_phi = new_phis[k];
4297           tree second_vect = PHI_RESULT (next_phi);
4298
4299           tmp = build2 (code, vectype,  first_vect, second_vect);
4300           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4301           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4302           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4303           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4304         }
4305
4306       new_phi_result = first_vect;
4307       if (new_vec_stmt)
4308         {
4309           new_phis.truncate (0);
4310           new_phis.safe_push (new_vec_stmt);
4311         }
4312     }
4313   else
4314     new_phi_result = PHI_RESULT (new_phis[0]);
4315
4316   /* 2.3 Create the reduction code, using one of the three schemes described
4317          above. In SLP we simply need to extract all the elements from the
4318          vector (without reducing them), so we use scalar shifts.  */
4319   if (reduc_code != ERROR_MARK && !slp_reduc)
4320     {
4321       tree tmp;
4322       tree vec_elem_type;
4323
4324       /*** Case 1:  Create:
4325            v_out2 = reduc_expr <v_out1>  */
4326
4327       if (dump_enabled_p ())
4328         dump_printf_loc (MSG_NOTE, vect_location,
4329                          "Reduce using direct vector reduction.\n");
4330
4331       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4332       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4333         {
4334           tree tmp_dest =
4335               vect_create_destination_var (scalar_dest, vec_elem_type);
4336           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4337           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4338           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4339           gimple_assign_set_lhs (epilog_stmt, new_temp);
4340           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4341
4342           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4343         }
4344       else
4345         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4346       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4347       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4348       gimple_assign_set_lhs (epilog_stmt, new_temp);
4349       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4350       scalar_results.safe_push (new_temp);
4351     }
4352   else
4353     {
4354       bool reduce_with_shift = have_whole_vector_shift (mode);
4355       int element_bitsize = tree_to_uhwi (bitsize);
4356       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4357       tree vec_temp;
4358
4359       /* Regardless of whether we have a whole vector shift, if we're
4360          emulating the operation via tree-vect-generic, we don't want
4361          to use it.  Only the first round of the reduction is likely
4362          to still be profitable via emulation.  */
4363       /* ??? It might be better to emit a reduction tree code here, so that
4364          tree-vect-generic can expand the first round via bit tricks.  */
4365       if (!VECTOR_MODE_P (mode))
4366         reduce_with_shift = false;
4367       else
4368         {
4369           optab optab = optab_for_tree_code (code, vectype, optab_default);
4370           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4371             reduce_with_shift = false;
4372         }
4373
4374       if (reduce_with_shift && !slp_reduc)
4375         {
4376           int nelements = vec_size_in_bits / element_bitsize;
4377           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4378
4379           int elt_offset;
4380
4381           tree zero_vec = build_zero_cst (vectype);
4382           /*** Case 2: Create:
4383              for (offset = nelements/2; offset >= 1; offset/=2)
4384                 {
4385                   Create:  va' = vec_shift <va, offset>
4386                   Create:  va = vop <va, va'>
4387                 }  */
4388
4389           tree rhs;
4390
4391           if (dump_enabled_p ())
4392             dump_printf_loc (MSG_NOTE, vect_location,
4393                              "Reduce using vector shifts\n");
4394
4395           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4396           new_temp = new_phi_result;
4397           for (elt_offset = nelements / 2;
4398                elt_offset >= 1;
4399                elt_offset /= 2)
4400             {
4401               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4402               tree mask = vect_gen_perm_mask_any (vectype, sel);
4403               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4404                                                  new_temp, zero_vec, mask);
4405               new_name = make_ssa_name (vec_dest, epilog_stmt);
4406               gimple_assign_set_lhs (epilog_stmt, new_name);
4407               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4408
4409               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4410                                                  new_temp);
4411               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4412               gimple_assign_set_lhs (epilog_stmt, new_temp);
4413               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4414             }
4415
4416           /* 2.4  Extract the final scalar result.  Create:
4417              s_out3 = extract_field <v_out2, bitpos>  */
4418
4419           if (dump_enabled_p ())
4420             dump_printf_loc (MSG_NOTE, vect_location,
4421                              "extract scalar result\n");
4422
4423           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4424                         bitsize, bitsize_zero_node);
4425           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4426           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4427           gimple_assign_set_lhs (epilog_stmt, new_temp);
4428           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4429           scalar_results.safe_push (new_temp);
4430         }
4431       else
4432         {
4433           /*** Case 3: Create:
4434              s = extract_field <v_out2, 0>
4435              for (offset = element_size;
4436                   offset < vector_size;
4437                   offset += element_size;)
4438                {
4439                  Create:  s' = extract_field <v_out2, offset>
4440                  Create:  s = op <s, s'>  // For non SLP cases
4441                }  */
4442
4443           if (dump_enabled_p ())
4444             dump_printf_loc (MSG_NOTE, vect_location,
4445                              "Reduce using scalar code.\n");
4446
4447           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4448           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4449             {
4450               int bit_offset;
4451               if (gimple_code (new_phi) == GIMPLE_PHI)
4452                 vec_temp = PHI_RESULT (new_phi);
4453               else
4454                 vec_temp = gimple_assign_lhs (new_phi);
4455               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4456                             bitsize_zero_node);
4457               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4458               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4459               gimple_assign_set_lhs (epilog_stmt, new_temp);
4460               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4461
4462               /* In SLP we don't need to apply reduction operation, so we just
4463                  collect s' values in SCALAR_RESULTS.  */
4464               if (slp_reduc)
4465                 scalar_results.safe_push (new_temp);
4466
4467               for (bit_offset = element_bitsize;
4468                    bit_offset < vec_size_in_bits;
4469                    bit_offset += element_bitsize)
4470                 {
4471                   tree bitpos = bitsize_int (bit_offset);
4472                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4473                                      bitsize, bitpos);
4474
4475                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4476                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4477                   gimple_assign_set_lhs (epilog_stmt, new_name);
4478                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4479
4480                   if (slp_reduc)
4481                     {
4482                       /* In SLP we don't need to apply reduction operation, so
4483                          we just collect s' values in SCALAR_RESULTS.  */
4484                       new_temp = new_name;
4485                       scalar_results.safe_push (new_name);
4486                     }
4487                   else
4488                     {
4489                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4490                                                          new_name, new_temp);
4491                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4492                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4493                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4494                     }
4495                 }
4496             }
4497
4498           /* The only case where we need to reduce scalar results in SLP, is
4499              unrolling.  If the size of SCALAR_RESULTS is greater than
4500              GROUP_SIZE, we reduce them combining elements modulo
4501              GROUP_SIZE.  */
4502           if (slp_reduc)
4503             {
4504               tree res, first_res, new_res;
4505               gimple *new_stmt;
4506
4507               /* Reduce multiple scalar results in case of SLP unrolling.  */
4508               for (j = group_size; scalar_results.iterate (j, &res);
4509                    j++)
4510                 {
4511                   first_res = scalar_results[j % group_size];
4512                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4513                                                   first_res, res);
4514                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4515                   gimple_assign_set_lhs (new_stmt, new_res);
4516                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4517                   scalar_results[j % group_size] = new_res;
4518                 }
4519             }
4520           else
4521             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4522             scalar_results.safe_push (new_temp);
4523         }
4524     }
4525
4526 vect_finalize_reduction:
4527
4528   if (double_reduc)
4529     loop = loop->inner;
4530
4531   /* 2.5 Adjust the final result by the initial value of the reduction
4532          variable. (When such adjustment is not needed, then
4533          'adjustment_def' is zero).  For example, if code is PLUS we create:
4534          new_temp = loop_exit_def + adjustment_def  */
4535
4536   if (adjustment_def)
4537     {
4538       gcc_assert (!slp_reduc);
4539       if (nested_in_vect_loop)
4540         {
4541           new_phi = new_phis[0];
4542           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4543           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4544           new_dest = vect_create_destination_var (scalar_dest, vectype);
4545         }
4546       else
4547         {
4548           new_temp = scalar_results[0];
4549           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4550           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4551           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4552         }
4553
4554       epilog_stmt = gimple_build_assign (new_dest, expr);
4555       new_temp = make_ssa_name (new_dest, epilog_stmt);
4556       gimple_assign_set_lhs (epilog_stmt, new_temp);
4557       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4558       if (nested_in_vect_loop)
4559         {
4560           set_vinfo_for_stmt (epilog_stmt,
4561                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4562                                                  NULL));
4563           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4564                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4565
4566           if (!double_reduc)
4567             scalar_results.quick_push (new_temp);
4568           else
4569             scalar_results[0] = new_temp;
4570         }
4571       else
4572         scalar_results[0] = new_temp;
4573
4574       new_phis[0] = epilog_stmt;
4575     }
4576
4577   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4578           phis with new adjusted scalar results, i.e., replace use <s_out0>
4579           with use <s_out4>.
4580
4581      Transform:
4582         loop_exit:
4583           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4584           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4585           v_out2 = reduce <v_out1>
4586           s_out3 = extract_field <v_out2, 0>
4587           s_out4 = adjust_result <s_out3>
4588           use <s_out0>
4589           use <s_out0>
4590
4591      into:
4592
4593         loop_exit:
4594           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4595           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4596           v_out2 = reduce <v_out1>
4597           s_out3 = extract_field <v_out2, 0>
4598           s_out4 = adjust_result <s_out3>
4599           use <s_out4>
4600           use <s_out4> */
4601
4602
4603   /* In SLP reduction chain we reduce vector results into one vector if
4604      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4605      the last stmt in the reduction chain, since we are looking for the loop
4606      exit phi node.  */
4607   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4608     {
4609       gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
4610       /* Handle reduction patterns.  */
4611       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
4612         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
4613
4614       scalar_dest = gimple_assign_lhs (dest_stmt);
4615       group_size = 1;
4616     }
4617
4618   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4619      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4620      need to match SCALAR_RESULTS with corresponding statements.  The first
4621      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4622      the first vector stmt, etc.
4623      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4624   if (group_size > new_phis.length ())
4625     {
4626       ratio = group_size / new_phis.length ();
4627       gcc_assert (!(group_size % new_phis.length ()));
4628     }
4629   else
4630     ratio = 1;
4631
4632   for (k = 0; k < group_size; k++)
4633     {
4634       if (k % ratio == 0)
4635         {
4636           epilog_stmt = new_phis[k / ratio];
4637           reduction_phi = reduction_phis[k / ratio];
4638           if (double_reduc)
4639             inner_phi = inner_phis[k / ratio];
4640         }
4641
4642       if (slp_reduc)
4643         {
4644           gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4645
4646           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4647           /* SLP statements can't participate in patterns.  */
4648           gcc_assert (!orig_stmt);
4649           scalar_dest = gimple_assign_lhs (current_stmt);
4650         }
4651
4652       phis.create (3);
4653       /* Find the loop-closed-use at the loop exit of the original scalar
4654          result.  (The reduction result is expected to have two immediate uses -
4655          one at the latch block, and one at the loop exit).  */
4656       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4657         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4658             && !is_gimple_debug (USE_STMT (use_p)))
4659           phis.safe_push (USE_STMT (use_p));
4660
4661       /* While we expect to have found an exit_phi because of loop-closed-ssa
4662          form we can end up without one if the scalar cycle is dead.  */
4663
4664       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4665         {
4666           if (outer_loop)
4667             {
4668               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4669               gphi *vect_phi;
4670
4671               /* FORNOW. Currently not supporting the case that an inner-loop
4672                  reduction is not used in the outer-loop (but only outside the
4673                  outer-loop), unless it is double reduction.  */
4674               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4675                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4676                           || double_reduc);
4677
4678               if (double_reduc)
4679                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
4680               else
4681                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4682               if (!double_reduc
4683                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4684                       != vect_double_reduction_def)
4685                 continue;
4686
4687               /* Handle double reduction:
4688
4689                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4690                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4691                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4692                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4693
4694                  At that point the regular reduction (stmt2 and stmt3) is
4695                  already vectorized, as well as the exit phi node, stmt4.
4696                  Here we vectorize the phi node of double reduction, stmt1, and
4697                  update all relevant statements.  */
4698
4699               /* Go through all the uses of s2 to find double reduction phi
4700                  node, i.e., stmt1 above.  */
4701               orig_name = PHI_RESULT (exit_phi);
4702               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4703                 {
4704                   stmt_vec_info use_stmt_vinfo;
4705                   stmt_vec_info new_phi_vinfo;
4706                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4707                   basic_block bb = gimple_bb (use_stmt);
4708                   gimple *use;
4709
4710                   /* Check that USE_STMT is really double reduction phi
4711                      node.  */
4712                   if (gimple_code (use_stmt) != GIMPLE_PHI
4713                       || gimple_phi_num_args (use_stmt) != 2
4714                       || bb->loop_father != outer_loop)
4715                     continue;
4716                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4717                   if (!use_stmt_vinfo
4718                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4719                           != vect_double_reduction_def)
4720                     continue;
4721
4722                   /* Create vector phi node for double reduction:
4723                      vs1 = phi <vs0, vs2>
4724                      vs1 was created previously in this function by a call to
4725                        vect_get_vec_def_for_operand and is stored in
4726                        vec_initial_def;
4727                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4728                      vs0 is created here.  */
4729
4730                   /* Create vector phi node.  */
4731                   vect_phi = create_phi_node (vec_initial_def, bb);
4732                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4733                                     loop_vec_info_for_loop (outer_loop), NULL);
4734                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4735
4736                   /* Create vs0 - initial def of the double reduction phi.  */
4737                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4738                                              loop_preheader_edge (outer_loop));
4739                   init_def = get_initial_def_for_reduction (stmt,
4740                                                           preheader_arg, NULL);
4741                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4742                                                     vectype, NULL);
4743
4744                   /* Update phi node arguments with vs0 and vs2.  */
4745                   add_phi_arg (vect_phi, vect_phi_init,
4746                                loop_preheader_edge (outer_loop),
4747                                UNKNOWN_LOCATION);
4748                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4749                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4750                   if (dump_enabled_p ())
4751                     {
4752                       dump_printf_loc (MSG_NOTE, vect_location,
4753                                        "created double reduction phi node: ");
4754                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4755                       dump_printf (MSG_NOTE, "\n");
4756                     }
4757
4758                   vect_phi_res = PHI_RESULT (vect_phi);
4759
4760                   /* Replace the use, i.e., set the correct vs1 in the regular
4761                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4762                      loop is redundant.  */
4763                   use = reduction_phi;
4764                   for (j = 0; j < ncopies; j++)
4765                     {
4766                       edge pr_edge = loop_preheader_edge (loop);
4767                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4768                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4769                     }
4770                 }
4771             }
4772         }
4773
4774       phis.release ();
4775       if (nested_in_vect_loop)
4776         {
4777           if (double_reduc)
4778             loop = outer_loop;
4779           else
4780             continue;
4781         }
4782
4783       phis.create (3);
4784       /* Find the loop-closed-use at the loop exit of the original scalar
4785          result.  (The reduction result is expected to have two immediate uses,
4786          one at the latch block, and one at the loop exit).  For double
4787          reductions we are looking for exit phis of the outer loop.  */
4788       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4789         {
4790           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4791             {
4792               if (!is_gimple_debug (USE_STMT (use_p)))
4793                 phis.safe_push (USE_STMT (use_p));
4794             }
4795           else
4796             {
4797               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4798                 {
4799                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4800
4801                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4802                     {
4803                       if (!flow_bb_inside_loop_p (loop,
4804                                              gimple_bb (USE_STMT (phi_use_p)))
4805                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4806                         phis.safe_push (USE_STMT (phi_use_p));
4807                     }
4808                 }
4809             }
4810         }
4811
4812       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4813         {
4814           /* Replace the uses:  */
4815           orig_name = PHI_RESULT (exit_phi);
4816           scalar_result = scalar_results[k];
4817           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4818             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4819               SET_USE (use_p, scalar_result);
4820         }
4821
4822       phis.release ();
4823     }
4824 }
4825
4826
4827 /* Function vectorizable_reduction.
4828
4829    Check if STMT performs a reduction operation that can be vectorized.
4830    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4831    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4832    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4833
4834    This function also handles reduction idioms (patterns) that have been
4835    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4836    of this form:
4837      X = pattern_expr (arg0, arg1, ..., X)
4838    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4839    sequence that had been detected and replaced by the pattern-stmt (STMT).
4840
4841    In some cases of reduction patterns, the type of the reduction variable X is
4842    different than the type of the other arguments of STMT.
4843    In such cases, the vectype that is used when transforming STMT into a vector
4844    stmt is different than the vectype that is used to determine the
4845    vectorization factor, because it consists of a different number of elements
4846    than the actual number of elements that are being operated upon in parallel.
4847
4848    For example, consider an accumulation of shorts into an int accumulator.
4849    On some targets it's possible to vectorize this pattern operating on 8
4850    shorts at a time (hence, the vectype for purposes of determining the
4851    vectorization factor should be V8HI); on the other hand, the vectype that
4852    is used to create the vector form is actually V4SI (the type of the result).
4853
4854    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4855    indicates what is the actual level of parallelism (V8HI in the example), so
4856    that the right vectorization factor would be derived.  This vectype
4857    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4858    be used to create the vectorized stmt.  The right vectype for the vectorized
4859    stmt is obtained from the type of the result X:
4860         get_vectype_for_scalar_type (TREE_TYPE (X))
4861
4862    This means that, contrary to "regular" reductions (or "regular" stmts in
4863    general), the following equation:
4864       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4865    does *NOT* necessarily hold for reduction patterns.  */
4866
4867 bool
4868 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
4869                         gimple **vec_stmt, slp_tree slp_node)
4870 {
4871   tree vec_dest;
4872   tree scalar_dest;
4873   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4874   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4875   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4876   tree vectype_in = NULL_TREE;
4877   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4878   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4879   enum tree_code code, orig_code, epilog_reduc_code;
4880   machine_mode vec_mode;
4881   int op_type;
4882   optab optab, reduc_optab;
4883   tree new_temp = NULL_TREE;
4884   tree def;
4885   gimple *def_stmt;
4886   enum vect_def_type dt;
4887   gphi *new_phi = NULL;
4888   tree scalar_type;
4889   bool is_simple_use;
4890   gimple *orig_stmt;
4891   stmt_vec_info orig_stmt_info;
4892   tree expr = NULL_TREE;
4893   int i;
4894   int ncopies;
4895   int epilog_copies;
4896   stmt_vec_info prev_stmt_info, prev_phi_info;
4897   bool single_defuse_cycle = false;
4898   tree reduc_def = NULL_TREE;
4899   gimple *new_stmt = NULL;
4900   int j;
4901   tree ops[3];
4902   bool nested_cycle = false, found_nested_cycle_def = false;
4903   gimple *reduc_def_stmt = NULL;
4904   bool double_reduc = false, dummy;
4905   basic_block def_bb;
4906   struct loop * def_stmt_loop, *outer_loop = NULL;
4907   tree def_arg;
4908   gimple *def_arg_stmt;
4909   auto_vec<tree> vec_oprnds0;
4910   auto_vec<tree> vec_oprnds1;
4911   auto_vec<tree> vect_defs;
4912   auto_vec<gimple *> phis;
4913   int vec_num;
4914   tree def0, def1, tem, op0, op1 = NULL_TREE;
4915   bool first_p = true;
4916
4917   /* In case of reduction chain we switch to the first stmt in the chain, but
4918      we don't update STMT_INFO, since only the last stmt is marked as reduction
4919      and has reduction properties.  */
4920   if (GROUP_FIRST_ELEMENT (stmt_info)
4921       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
4922     {
4923       stmt = GROUP_FIRST_ELEMENT (stmt_info);
4924       first_p = false;
4925     }
4926
4927   if (nested_in_vect_loop_p (loop, stmt))
4928     {
4929       outer_loop = loop;
4930       loop = loop->inner;
4931       nested_cycle = true;
4932     }
4933
4934   /* 1. Is vectorizable reduction?  */
4935   /* Not supportable if the reduction variable is used in the loop, unless
4936      it's a reduction chain.  */
4937   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4938       && !GROUP_FIRST_ELEMENT (stmt_info))
4939     return false;
4940
4941   /* Reductions that are not used even in an enclosing outer-loop,
4942      are expected to be "live" (used out of the loop).  */
4943   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4944       && !STMT_VINFO_LIVE_P (stmt_info))
4945     return false;
4946
4947   /* Make sure it was already recognized as a reduction computation.  */
4948   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
4949       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
4950     return false;
4951
4952   /* 2. Has this been recognized as a reduction pattern?
4953
4954      Check if STMT represents a pattern that has been recognized
4955      in earlier analysis stages.  For stmts that represent a pattern,
4956      the STMT_VINFO_RELATED_STMT field records the last stmt in
4957      the original sequence that constitutes the pattern.  */
4958
4959   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
4960   if (orig_stmt)
4961     {
4962       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4963       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4964       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4965     }
4966
4967   /* 3. Check the operands of the operation.  The first operands are defined
4968         inside the loop body. The last operand is the reduction variable,
4969         which is defined by the loop-header-phi.  */
4970
4971   gcc_assert (is_gimple_assign (stmt));
4972
4973   /* Flatten RHS.  */
4974   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4975     {
4976     case GIMPLE_SINGLE_RHS:
4977       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4978       if (op_type == ternary_op)
4979         {
4980           tree rhs = gimple_assign_rhs1 (stmt);
4981           ops[0] = TREE_OPERAND (rhs, 0);
4982           ops[1] = TREE_OPERAND (rhs, 1);
4983           ops[2] = TREE_OPERAND (rhs, 2);
4984           code = TREE_CODE (rhs);
4985         }
4986       else
4987         return false;
4988       break;
4989
4990     case GIMPLE_BINARY_RHS:
4991       code = gimple_assign_rhs_code (stmt);
4992       op_type = TREE_CODE_LENGTH (code);
4993       gcc_assert (op_type == binary_op);
4994       ops[0] = gimple_assign_rhs1 (stmt);
4995       ops[1] = gimple_assign_rhs2 (stmt);
4996       break;
4997
4998     case GIMPLE_TERNARY_RHS:
4999       code = gimple_assign_rhs_code (stmt);
5000       op_type = TREE_CODE_LENGTH (code);
5001       gcc_assert (op_type == ternary_op);
5002       ops[0] = gimple_assign_rhs1 (stmt);
5003       ops[1] = gimple_assign_rhs2 (stmt);
5004       ops[2] = gimple_assign_rhs3 (stmt);
5005       break;
5006
5007     case GIMPLE_UNARY_RHS:
5008       return false;
5009
5010     default:
5011       gcc_unreachable ();
5012     }
5013   /* The default is that the reduction variable is the last in statement.  */
5014   int reduc_index = op_type - 1;
5015
5016   if (code == COND_EXPR && slp_node)
5017     return false;
5018
5019   scalar_dest = gimple_assign_lhs (stmt);
5020   scalar_type = TREE_TYPE (scalar_dest);
5021   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5022       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5023     return false;
5024
5025   /* Do not try to vectorize bit-precision reductions.  */
5026   if ((TYPE_PRECISION (scalar_type)
5027        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5028     return false;
5029
5030   /* All uses but the last are expected to be defined in the loop.
5031      The last use is the reduction variable.  In case of nested cycle this
5032      assumption is not true: we use reduc_index to record the index of the
5033      reduction variable.  */
5034   for (i = 0; i < op_type - 1; i++)
5035     {
5036       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5037       if (i == 0 && code == COND_EXPR)
5038         continue;
5039
5040       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
5041                                             &def_stmt, &def, &dt, &tem);
5042       if (!vectype_in)
5043         vectype_in = tem;
5044       gcc_assert (is_simple_use);
5045
5046       if (dt != vect_internal_def
5047           && dt != vect_external_def
5048           && dt != vect_constant_def
5049           && dt != vect_induction_def
5050           && !(dt == vect_nested_cycle && nested_cycle))
5051         return false;
5052
5053       if (dt == vect_nested_cycle)
5054         {
5055           found_nested_cycle_def = true;
5056           reduc_def_stmt = def_stmt;
5057           reduc_index = i;
5058         }
5059     }
5060
5061   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
5062                                         &def_stmt, &def, &dt, &tem);
5063   if (!vectype_in)
5064     vectype_in = tem;
5065   gcc_assert (is_simple_use);
5066   if (!found_nested_cycle_def)
5067     reduc_def_stmt = def_stmt;
5068
5069   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5070     return false;
5071
5072   if (!(dt == vect_reduction_def
5073         || dt == vect_nested_cycle
5074         || ((dt == vect_internal_def || dt == vect_external_def
5075              || dt == vect_constant_def || dt == vect_induction_def)
5076             && nested_cycle && found_nested_cycle_def)))
5077     {
5078       /* For pattern recognized stmts, orig_stmt might be a reduction,
5079          but some helper statements for the pattern might not, or
5080          might be COND_EXPRs with reduction uses in the condition.  */
5081       gcc_assert (orig_stmt);
5082       return false;
5083     }
5084
5085   gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5086                                           !nested_cycle, &dummy, false);
5087   if (orig_stmt)
5088     gcc_assert (tmp == orig_stmt
5089                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5090   else
5091     /* We changed STMT to be the first stmt in reduction chain, hence we
5092        check that in this case the first element in the chain is STMT.  */
5093     gcc_assert (stmt == tmp
5094                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5095
5096   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5097     return false;
5098
5099   if (slp_node || PURE_SLP_STMT (stmt_info))
5100     ncopies = 1;
5101   else
5102     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5103                / TYPE_VECTOR_SUBPARTS (vectype_in));
5104
5105   gcc_assert (ncopies >= 1);
5106
5107   vec_mode = TYPE_MODE (vectype_in);
5108
5109   if (code == COND_EXPR)
5110     {
5111       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
5112         {
5113           if (dump_enabled_p ())
5114             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115                              "unsupported condition in reduction\n");
5116
5117           return false;
5118         }
5119     }
5120   else
5121     {
5122       /* 4. Supportable by target?  */
5123
5124       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5125           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5126         {
5127           /* Shifts and rotates are only supported by vectorizable_shifts,
5128              not vectorizable_reduction.  */
5129           if (dump_enabled_p ())
5130             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5131                              "unsupported shift or rotation.\n");
5132           return false;
5133         }
5134
5135       /* 4.1. check support for the operation in the loop  */
5136       optab = optab_for_tree_code (code, vectype_in, optab_default);
5137       if (!optab)
5138         {
5139           if (dump_enabled_p ())
5140             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141                              "no optab.\n");
5142
5143           return false;
5144         }
5145
5146       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5147         {
5148           if (dump_enabled_p ())
5149             dump_printf (MSG_NOTE, "op not supported by target.\n");
5150
5151           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5152               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5153                   < vect_min_worthwhile_factor (code))
5154             return false;
5155
5156           if (dump_enabled_p ())
5157             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5158         }
5159
5160       /* Worthwhile without SIMD support?  */
5161       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5162           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5163              < vect_min_worthwhile_factor (code))
5164         {
5165           if (dump_enabled_p ())
5166             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5167                              "not worthwhile without SIMD support.\n");
5168
5169           return false;
5170         }
5171     }
5172
5173   /* 4.2. Check support for the epilog operation.
5174
5175           If STMT represents a reduction pattern, then the type of the
5176           reduction variable may be different than the type of the rest
5177           of the arguments.  For example, consider the case of accumulation
5178           of shorts into an int accumulator; The original code:
5179                         S1: int_a = (int) short_a;
5180           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5181
5182           was replaced with:
5183                         STMT: int_acc = widen_sum <short_a, int_acc>
5184
5185           This means that:
5186           1. The tree-code that is used to create the vector operation in the
5187              epilog code (that reduces the partial results) is not the
5188              tree-code of STMT, but is rather the tree-code of the original
5189              stmt from the pattern that STMT is replacing.  I.e, in the example
5190              above we want to use 'widen_sum' in the loop, but 'plus' in the
5191              epilog.
5192           2. The type (mode) we use to check available target support
5193              for the vector operation to be created in the *epilog*, is
5194              determined by the type of the reduction variable (in the example
5195              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5196              However the type (mode) we use to check available target support
5197              for the vector operation to be created *inside the loop*, is
5198              determined by the type of the other arguments to STMT (in the
5199              example we'd check this: optab_handler (widen_sum_optab,
5200              vect_short_mode)).
5201
5202           This is contrary to "regular" reductions, in which the types of all
5203           the arguments are the same as the type of the reduction variable.
5204           For "regular" reductions we can therefore use the same vector type
5205           (and also the same tree-code) when generating the epilog code and
5206           when generating the code inside the loop.  */
5207
5208   if (orig_stmt)
5209     {
5210       /* This is a reduction pattern: get the vectype from the type of the
5211          reduction variable, and get the tree-code from orig_stmt.  */
5212       orig_code = gimple_assign_rhs_code (orig_stmt);
5213       gcc_assert (vectype_out);
5214       vec_mode = TYPE_MODE (vectype_out);
5215     }
5216   else
5217     {
5218       /* Regular reduction: use the same vectype and tree-code as used for
5219          the vector code inside the loop can be used for the epilog code. */
5220       orig_code = code;
5221     }
5222
5223   if (nested_cycle)
5224     {
5225       def_bb = gimple_bb (reduc_def_stmt);
5226       def_stmt_loop = def_bb->loop_father;
5227       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5228                                        loop_preheader_edge (def_stmt_loop));
5229       if (TREE_CODE (def_arg) == SSA_NAME
5230           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5231           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5232           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5233           && vinfo_for_stmt (def_arg_stmt)
5234           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5235               == vect_double_reduction_def)
5236         double_reduc = true;
5237     }
5238
5239   epilog_reduc_code = ERROR_MARK;
5240   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5241     {
5242       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5243                                          optab_default);
5244       if (!reduc_optab)
5245         {
5246           if (dump_enabled_p ())
5247             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5248                              "no optab for reduction.\n");
5249
5250           epilog_reduc_code = ERROR_MARK;
5251         }
5252       else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5253         {
5254           optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5255           if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5256             {
5257               if (dump_enabled_p ())
5258                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5259                                  "reduc op not supported by target.\n");
5260
5261               epilog_reduc_code = ERROR_MARK;
5262             }
5263         }
5264     }
5265   else
5266     {
5267       if (!nested_cycle || double_reduc)
5268         {
5269           if (dump_enabled_p ())
5270             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5271                              "no reduc code for scalar code.\n");
5272
5273           return false;
5274         }
5275     }
5276
5277   if (double_reduc && ncopies > 1)
5278     {
5279       if (dump_enabled_p ())
5280         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5281                          "multiple types in double reduction\n");
5282
5283       return false;
5284     }
5285
5286   /* In case of widenning multiplication by a constant, we update the type
5287      of the constant to be the type of the other operand.  We check that the
5288      constant fits the type in the pattern recognition pass.  */
5289   if (code == DOT_PROD_EXPR
5290       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5291     {
5292       if (TREE_CODE (ops[0]) == INTEGER_CST)
5293         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5294       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5295         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5296       else
5297         {
5298           if (dump_enabled_p ())
5299             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5300                              "invalid types in dot-prod\n");
5301
5302           return false;
5303         }
5304     }
5305
5306   if (!vec_stmt) /* transformation not required.  */
5307     {
5308       if (first_p
5309           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5310                                          reduc_index))
5311         return false;
5312       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5313       return true;
5314     }
5315
5316   /** Transform.  **/
5317
5318   if (dump_enabled_p ())
5319     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5320
5321   /* FORNOW: Multiple types are not supported for condition.  */
5322   if (code == COND_EXPR)
5323     gcc_assert (ncopies == 1);
5324
5325   /* Create the destination vector  */
5326   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5327
5328   /* In case the vectorization factor (VF) is bigger than the number
5329      of elements that we can fit in a vectype (nunits), we have to generate
5330      more than one vector stmt - i.e - we need to "unroll" the
5331      vector stmt by a factor VF/nunits.  For more details see documentation
5332      in vectorizable_operation.  */
5333
5334   /* If the reduction is used in an outer loop we need to generate
5335      VF intermediate results, like so (e.g. for ncopies=2):
5336         r0 = phi (init, r0)
5337         r1 = phi (init, r1)
5338         r0 = x0 + r0;
5339         r1 = x1 + r1;
5340     (i.e. we generate VF results in 2 registers).
5341     In this case we have a separate def-use cycle for each copy, and therefore
5342     for each copy we get the vector def for the reduction variable from the
5343     respective phi node created for this copy.
5344
5345     Otherwise (the reduction is unused in the loop nest), we can combine
5346     together intermediate results, like so (e.g. for ncopies=2):
5347         r = phi (init, r)
5348         r = x0 + r;
5349         r = x1 + r;
5350    (i.e. we generate VF/2 results in a single register).
5351    In this case for each copy we get the vector def for the reduction variable
5352    from the vectorized reduction operation generated in the previous iteration.
5353   */
5354
5355   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5356     {
5357       single_defuse_cycle = true;
5358       epilog_copies = 1;
5359     }
5360   else
5361     epilog_copies = ncopies;
5362
5363   prev_stmt_info = NULL;
5364   prev_phi_info = NULL;
5365   if (slp_node)
5366     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5367   else
5368     {
5369       vec_num = 1;
5370       vec_oprnds0.create (1);
5371       if (op_type == ternary_op)
5372         vec_oprnds1.create (1);
5373     }
5374
5375   phis.create (vec_num);
5376   vect_defs.create (vec_num);
5377   if (!slp_node)
5378     vect_defs.quick_push (NULL_TREE);
5379
5380   for (j = 0; j < ncopies; j++)
5381     {
5382       if (j == 0 || !single_defuse_cycle)
5383         {
5384           for (i = 0; i < vec_num; i++)
5385             {
5386               /* Create the reduction-phi that defines the reduction
5387                  operand.  */
5388               new_phi = create_phi_node (vec_dest, loop->header);
5389               set_vinfo_for_stmt (new_phi,
5390                                   new_stmt_vec_info (new_phi, loop_vinfo,
5391                                                      NULL));
5392                if (j == 0 || slp_node)
5393                  phis.quick_push (new_phi);
5394             }
5395         }
5396
5397       if (code == COND_EXPR)
5398         {
5399           gcc_assert (!slp_node);
5400           vectorizable_condition (stmt, gsi, vec_stmt,
5401                                   PHI_RESULT (phis[0]),
5402                                   reduc_index, NULL);
5403           /* Multiple types are not supported for condition.  */
5404           break;
5405         }
5406
5407       /* Handle uses.  */
5408       if (j == 0)
5409         {
5410           op0 = ops[!reduc_index];
5411           if (op_type == ternary_op)
5412             {
5413               if (reduc_index == 0)
5414                 op1 = ops[2];
5415               else
5416                 op1 = ops[1];
5417             }
5418
5419           if (slp_node)
5420             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5421                                slp_node, -1);
5422           else
5423             {
5424               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5425                                                             stmt, NULL);
5426               vec_oprnds0.quick_push (loop_vec_def0);
5427               if (op_type == ternary_op)
5428                {
5429                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5430                                                                NULL);
5431                  vec_oprnds1.quick_push (loop_vec_def1);
5432                }
5433             }
5434         }
5435       else
5436         {
5437           if (!slp_node)
5438             {
5439               enum vect_def_type dt;
5440               gimple *dummy_stmt;
5441               tree dummy;
5442
5443               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5444                                   &dummy_stmt, &dummy, &dt);
5445               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5446                                                               loop_vec_def0);
5447               vec_oprnds0[0] = loop_vec_def0;
5448               if (op_type == ternary_op)
5449                 {
5450                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5451                                       &dummy, &dt);
5452                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5453                                                                 loop_vec_def1);
5454                   vec_oprnds1[0] = loop_vec_def1;
5455                 }
5456             }
5457
5458           if (single_defuse_cycle)
5459             reduc_def = gimple_assign_lhs (new_stmt);
5460
5461           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5462         }
5463
5464       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5465         {
5466           if (slp_node)
5467             reduc_def = PHI_RESULT (phis[i]);
5468           else
5469             {
5470               if (!single_defuse_cycle || j == 0)
5471                 reduc_def = PHI_RESULT (new_phi);
5472             }
5473
5474           def1 = ((op_type == ternary_op)
5475                   ? vec_oprnds1[i] : NULL);
5476           if (op_type == binary_op)
5477             {
5478               if (reduc_index == 0)
5479                 expr = build2 (code, vectype_out, reduc_def, def0);
5480               else
5481                 expr = build2 (code, vectype_out, def0, reduc_def);
5482             }
5483           else
5484             {
5485               if (reduc_index == 0)
5486                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5487               else
5488                 {
5489                   if (reduc_index == 1)
5490                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5491                   else
5492                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5493                 }
5494             }
5495
5496           new_stmt = gimple_build_assign (vec_dest, expr);
5497           new_temp = make_ssa_name (vec_dest, new_stmt);
5498           gimple_assign_set_lhs (new_stmt, new_temp);
5499           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5500
5501           if (slp_node)
5502             {
5503               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5504               vect_defs.quick_push (new_temp);
5505             }
5506           else
5507             vect_defs[0] = new_temp;
5508         }
5509
5510       if (slp_node)
5511         continue;
5512
5513       if (j == 0)
5514         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5515       else
5516         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5517
5518       prev_stmt_info = vinfo_for_stmt (new_stmt);
5519       prev_phi_info = vinfo_for_stmt (new_phi);
5520     }
5521
5522   /* Finalize the reduction-phi (set its arguments) and create the
5523      epilog reduction code.  */
5524   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5525     {
5526       new_temp = gimple_assign_lhs (*vec_stmt);
5527       vect_defs[0] = new_temp;
5528     }
5529
5530   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5531                                     epilog_reduc_code, phis, reduc_index,
5532                                     double_reduc, slp_node);
5533
5534   return true;
5535 }
5536
5537 /* Function vect_min_worthwhile_factor.
5538
5539    For a loop where we could vectorize the operation indicated by CODE,
5540    return the minimum vectorization factor that makes it worthwhile
5541    to use generic vectors.  */
5542 int
5543 vect_min_worthwhile_factor (enum tree_code code)
5544 {
5545   switch (code)
5546     {
5547     case PLUS_EXPR:
5548     case MINUS_EXPR:
5549     case NEGATE_EXPR:
5550       return 4;
5551
5552     case BIT_AND_EXPR:
5553     case BIT_IOR_EXPR:
5554     case BIT_XOR_EXPR:
5555     case BIT_NOT_EXPR:
5556       return 2;
5557
5558     default:
5559       return INT_MAX;
5560     }
5561 }
5562
5563
5564 /* Function vectorizable_induction
5565
5566    Check if PHI performs an induction computation that can be vectorized.
5567    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5568    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5569    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5570
5571 bool
5572 vectorizable_induction (gimple *phi,
5573                         gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5574                         gimple **vec_stmt)
5575 {
5576   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5577   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5578   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5579   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5580   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5581   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5582   tree vec_def;
5583
5584   gcc_assert (ncopies >= 1);
5585   /* FORNOW. These restrictions should be relaxed.  */
5586   if (nested_in_vect_loop_p (loop, phi))
5587     {
5588       imm_use_iterator imm_iter;
5589       use_operand_p use_p;
5590       gimple *exit_phi;
5591       edge latch_e;
5592       tree loop_arg;
5593
5594       if (ncopies > 1)
5595         {
5596           if (dump_enabled_p ())
5597             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5598                              "multiple types in nested loop.\n");
5599           return false;
5600         }
5601
5602       exit_phi = NULL;
5603       latch_e = loop_latch_edge (loop->inner);
5604       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5605       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5606         {
5607           gimple *use_stmt = USE_STMT (use_p);
5608           if (is_gimple_debug (use_stmt))
5609             continue;
5610
5611           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
5612             {
5613               exit_phi = use_stmt;
5614               break;
5615             }
5616         }
5617       if (exit_phi)
5618         {
5619           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5620           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5621                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5622             {
5623               if (dump_enabled_p ())
5624                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5625                                  "inner-loop induction only used outside "
5626                                  "of the outer vectorized loop.\n");
5627               return false;
5628             }
5629         }
5630     }
5631
5632   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5633     return false;
5634
5635   /* FORNOW: SLP not supported.  */
5636   if (STMT_SLP_TYPE (stmt_info))
5637     return false;
5638
5639   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5640
5641   if (gimple_code (phi) != GIMPLE_PHI)
5642     return false;
5643
5644   if (!vec_stmt) /* transformation not required.  */
5645     {
5646       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5647       if (dump_enabled_p ())
5648         dump_printf_loc (MSG_NOTE, vect_location,
5649                          "=== vectorizable_induction ===\n");
5650       vect_model_induction_cost (stmt_info, ncopies);
5651       return true;
5652     }
5653
5654   /** Transform.  **/
5655
5656   if (dump_enabled_p ())
5657     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5658
5659   vec_def = get_initial_def_for_induction (phi);
5660   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5661   return true;
5662 }
5663
5664 /* Function vectorizable_live_operation.
5665
5666    STMT computes a value that is used outside the loop.  Check if
5667    it can be supported.  */
5668
5669 bool
5670 vectorizable_live_operation (gimple *stmt,
5671                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5672                              gimple **vec_stmt)
5673 {
5674   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5675   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5676   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5677   int i;
5678   int op_type;
5679   tree op;
5680   tree def;
5681   gimple *def_stmt;
5682   enum vect_def_type dt;
5683   enum tree_code code;
5684   enum gimple_rhs_class rhs_class;
5685
5686   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5687
5688   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5689     return false;
5690
5691   if (!is_gimple_assign (stmt))
5692     {
5693       if (gimple_call_internal_p (stmt)
5694           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5695           && gimple_call_lhs (stmt)
5696           && loop->simduid
5697           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5698           && loop->simduid
5699              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5700         {
5701           edge e = single_exit (loop);
5702           basic_block merge_bb = e->dest;
5703           imm_use_iterator imm_iter;
5704           use_operand_p use_p;
5705           tree lhs = gimple_call_lhs (stmt);
5706
5707           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5708             {
5709               gimple *use_stmt = USE_STMT (use_p);
5710               if (gimple_code (use_stmt) == GIMPLE_PHI
5711                   && gimple_bb (use_stmt) == merge_bb)
5712                 {
5713                   if (vec_stmt)
5714                     {
5715                       tree vfm1
5716                         = build_int_cst (unsigned_type_node,
5717                                          loop_vinfo->vectorization_factor - 1);
5718                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5719                     }
5720                   return true;
5721                 }
5722             }
5723         }
5724
5725       return false;
5726     }
5727
5728   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5729     return false;
5730
5731   /* FORNOW. CHECKME. */
5732   if (nested_in_vect_loop_p (loop, stmt))
5733     return false;
5734
5735   code = gimple_assign_rhs_code (stmt);
5736   op_type = TREE_CODE_LENGTH (code);
5737   rhs_class = get_gimple_rhs_class (code);
5738   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5739   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5740
5741   /* FORNOW: support only if all uses are invariant.  This means
5742      that the scalar operations can remain in place, unvectorized.
5743      The original last scalar value that they compute will be used.  */
5744
5745   for (i = 0; i < op_type; i++)
5746     {
5747       if (rhs_class == GIMPLE_SINGLE_RHS)
5748         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5749       else
5750         op = gimple_op (stmt, i + 1);
5751       if (op
5752           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5753                                   &dt))
5754         {
5755           if (dump_enabled_p ())
5756             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5757                              "use not simple.\n");
5758           return false;
5759         }
5760
5761       if (dt != vect_external_def && dt != vect_constant_def)
5762         return false;
5763     }
5764
5765   /* No transformation is required for the cases we currently support.  */
5766   return true;
5767 }
5768
5769 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5770
5771 static void
5772 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt)
5773 {
5774   ssa_op_iter op_iter;
5775   imm_use_iterator imm_iter;
5776   def_operand_p def_p;
5777   gimple *ustmt;
5778
5779   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5780     {
5781       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5782         {
5783           basic_block bb;
5784
5785           if (!is_gimple_debug (ustmt))
5786             continue;
5787
5788           bb = gimple_bb (ustmt);
5789
5790           if (!flow_bb_inside_loop_p (loop, bb))
5791             {
5792               if (gimple_debug_bind_p (ustmt))
5793                 {
5794                   if (dump_enabled_p ())
5795                     dump_printf_loc (MSG_NOTE, vect_location,
5796                                      "killing debug use\n");
5797
5798                   gimple_debug_bind_reset_value (ustmt);
5799                   update_stmt (ustmt);
5800                 }
5801               else
5802                 gcc_unreachable ();
5803             }
5804         }
5805     }
5806 }
5807
5808
5809 /* This function builds ni_name = number of iterations.  Statements
5810    are emitted on the loop preheader edge.  */
5811
5812 static tree
5813 vect_build_loop_niters (loop_vec_info loop_vinfo)
5814 {
5815   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5816   if (TREE_CODE (ni) == INTEGER_CST)
5817     return ni;
5818   else
5819     {
5820       tree ni_name, var;
5821       gimple_seq stmts = NULL;
5822       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
5823
5824       var = create_tmp_var (TREE_TYPE (ni), "niters");
5825       ni_name = force_gimple_operand (ni, &stmts, false, var);
5826       if (stmts)
5827         gsi_insert_seq_on_edge_immediate (pe, stmts);
5828
5829       return ni_name;
5830     }
5831 }
5832
5833
5834 /* This function generates the following statements:
5835
5836    ni_name = number of iterations loop executes
5837    ratio = ni_name / vf
5838    ratio_mult_vf_name = ratio * vf
5839
5840    and places them on the loop preheader edge.  */
5841
5842 static void
5843 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5844                                  tree ni_name,
5845                                  tree *ratio_mult_vf_name_ptr,
5846                                  tree *ratio_name_ptr)
5847 {
5848   tree ni_minus_gap_name;
5849   tree var;
5850   tree ratio_name;
5851   tree ratio_mult_vf_name;
5852   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5853   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
5854   tree log_vf;
5855
5856   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
5857
5858   /* If epilogue loop is required because of data accesses with gaps, we
5859      subtract one iteration from the total number of iterations here for
5860      correct calculation of RATIO.  */
5861   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5862     {
5863       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5864                                        ni_name,
5865                                        build_one_cst (TREE_TYPE (ni_name)));
5866       if (!is_gimple_val (ni_minus_gap_name))
5867         {
5868           var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
5869           gimple *stmts = NULL;
5870           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
5871                                                     true, var);
5872           gsi_insert_seq_on_edge_immediate (pe, stmts);
5873         }
5874     }
5875   else
5876     ni_minus_gap_name = ni_name;
5877
5878   /* Create: ratio = ni >> log2(vf) */
5879   /* ???  As we have ni == number of latch executions + 1, ni could
5880      have overflown to zero.  So avoid computing ratio based on ni
5881      but compute it using the fact that we know ratio will be at least
5882      one, thus via (ni - vf) >> log2(vf) + 1.  */
5883   ratio_name
5884     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
5885                    fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
5886                                 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5887                                              ni_minus_gap_name,
5888                                              build_int_cst
5889                                                (TREE_TYPE (ni_name), vf)),
5890                                 log_vf),
5891                    build_int_cst (TREE_TYPE (ni_name), 1));
5892   if (!is_gimple_val (ratio_name))
5893     {
5894       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
5895       gimple *stmts = NULL;
5896       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
5897       gsi_insert_seq_on_edge_immediate (pe, stmts);
5898     }
5899   *ratio_name_ptr = ratio_name;
5900
5901   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
5902
5903   if (ratio_mult_vf_name_ptr)
5904     {
5905       ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5906                                         ratio_name, log_vf);
5907       if (!is_gimple_val (ratio_mult_vf_name))
5908         {
5909           var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
5910           gimple *stmts = NULL;
5911           ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
5912                                                      true, var);
5913           gsi_insert_seq_on_edge_immediate (pe, stmts);
5914         }
5915       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5916     }
5917
5918   return;
5919 }
5920
5921
5922 /* Function vect_transform_loop.
5923
5924    The analysis phase has determined that the loop is vectorizable.
5925    Vectorize the loop - created vectorized stmts to replace the scalar
5926    stmts in the loop, and update the loop exit condition.  */
5927
5928 void
5929 vect_transform_loop (loop_vec_info loop_vinfo)
5930 {
5931   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5932   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5933   int nbbs = loop->num_nodes;
5934   int i;
5935   tree ratio = NULL;
5936   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5937   bool grouped_store;
5938   bool slp_scheduled = false;
5939   gimple *stmt, *pattern_stmt;
5940   gimple_seq pattern_def_seq = NULL;
5941   gimple_stmt_iterator pattern_def_si = gsi_none ();
5942   bool transform_pattern_stmt = false;
5943   bool check_profitability = false;
5944   int th;
5945   /* Record number of iterations before we started tampering with the profile. */
5946   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5947
5948   if (dump_enabled_p ())
5949     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5950
5951   /* If profile is inprecise, we have chance to fix it up.  */
5952   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5953     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5954
5955   /* Use the more conservative vectorization threshold.  If the number
5956      of iterations is constant assume the cost check has been performed
5957      by our caller.  If the threshold makes all loops profitable that
5958      run at least the vectorization factor number of times checking
5959      is pointless, too.  */
5960   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
5961   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5962       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5963     {
5964       if (dump_enabled_p ())
5965         dump_printf_loc (MSG_NOTE, vect_location,
5966                          "Profitability threshold is %d loop iterations.\n",
5967                          th);
5968       check_profitability = true;
5969     }
5970
5971   /* Version the loop first, if required, so the profitability check
5972      comes first.  */
5973
5974   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5975       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5976     {
5977       vect_loop_versioning (loop_vinfo, th, check_profitability);
5978       check_profitability = false;
5979     }
5980
5981   tree ni_name = vect_build_loop_niters (loop_vinfo);
5982   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
5983
5984   /* Peel the loop if there are data refs with unknown alignment.
5985      Only one data ref with unknown store is allowed.  */
5986
5987   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
5988     {
5989       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
5990                                      th, check_profitability);
5991       check_profitability = false;
5992       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
5993          be re-computed.  */
5994       ni_name = NULL_TREE;
5995     }
5996
5997   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5998      compile time constant), or it is a constant that doesn't divide by the
5999      vectorization factor, then an epilog loop needs to be created.
6000      We therefore duplicate the loop: the original loop will be vectorized,
6001      and will compute the first (n/VF) iterations.  The second copy of the loop
6002      will remain scalar and will compute the remaining (n%VF) iterations.
6003      (VF is the vectorization factor).  */
6004
6005   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6006       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6007     {
6008       tree ratio_mult_vf;
6009       if (!ni_name)
6010         ni_name = vect_build_loop_niters (loop_vinfo);
6011       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6012                                        &ratio);
6013       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6014                                       th, check_profitability);
6015     }
6016   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6017     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6018                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6019   else
6020     {
6021       if (!ni_name)
6022         ni_name = vect_build_loop_niters (loop_vinfo);
6023       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6024     }
6025
6026   /* 1) Make sure the loop header has exactly two entries
6027      2) Make sure we have a preheader basic block.  */
6028
6029   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6030
6031   split_edge (loop_preheader_edge (loop));
6032
6033   /* FORNOW: the vectorizer supports only loops which body consist
6034      of one basic block (header + empty latch). When the vectorizer will
6035      support more involved loop forms, the order by which the BBs are
6036      traversed need to be reconsidered.  */
6037
6038   for (i = 0; i < nbbs; i++)
6039     {
6040       basic_block bb = bbs[i];
6041       stmt_vec_info stmt_info;
6042
6043       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6044            gsi_next (&si))
6045         {
6046           gphi *phi = si.phi ();
6047           if (dump_enabled_p ())
6048             {
6049               dump_printf_loc (MSG_NOTE, vect_location,
6050                                "------>vectorizing phi: ");
6051               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6052               dump_printf (MSG_NOTE, "\n");
6053             }
6054           stmt_info = vinfo_for_stmt (phi);
6055           if (!stmt_info)
6056             continue;
6057
6058           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6059             vect_loop_kill_debug_uses (loop, phi);
6060
6061           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6062               && !STMT_VINFO_LIVE_P (stmt_info))
6063             continue;
6064
6065           if (STMT_VINFO_VECTYPE (stmt_info)
6066               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6067                   != (unsigned HOST_WIDE_INT) vectorization_factor)
6068               && dump_enabled_p ())
6069             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6070
6071           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6072             {
6073               if (dump_enabled_p ())
6074                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6075               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6076             }
6077         }
6078
6079       pattern_stmt = NULL;
6080       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6081            !gsi_end_p (si) || transform_pattern_stmt;)
6082         {
6083           bool is_store;
6084
6085           if (transform_pattern_stmt)
6086             stmt = pattern_stmt;
6087           else
6088             {
6089               stmt = gsi_stmt (si);
6090               /* During vectorization remove existing clobber stmts.  */
6091               if (gimple_clobber_p (stmt))
6092                 {
6093                   unlink_stmt_vdef (stmt);
6094                   gsi_remove (&si, true);
6095                   release_defs (stmt);
6096                   continue;
6097                 }
6098             }
6099
6100           if (dump_enabled_p ())
6101             {
6102               dump_printf_loc (MSG_NOTE, vect_location,
6103                                "------>vectorizing statement: ");
6104               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6105               dump_printf (MSG_NOTE, "\n");
6106             }
6107
6108           stmt_info = vinfo_for_stmt (stmt);
6109
6110           /* vector stmts created in the outer-loop during vectorization of
6111              stmts in an inner-loop may not have a stmt_info, and do not
6112              need to be vectorized.  */
6113           if (!stmt_info)
6114             {
6115               gsi_next (&si);
6116               continue;
6117             }
6118
6119           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6120             vect_loop_kill_debug_uses (loop, stmt);
6121
6122           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6123               && !STMT_VINFO_LIVE_P (stmt_info))
6124             {
6125               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6126                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6127                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6128                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6129                 {
6130                   stmt = pattern_stmt;
6131                   stmt_info = vinfo_for_stmt (stmt);
6132                 }
6133               else
6134                 {
6135                   gsi_next (&si);
6136                   continue;
6137                 }
6138             }
6139           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6140                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6141                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6142                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6143             transform_pattern_stmt = true;
6144
6145           /* If pattern statement has def stmts, vectorize them too.  */
6146           if (is_pattern_stmt_p (stmt_info))
6147             {
6148               if (pattern_def_seq == NULL)
6149                 {
6150                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6151                   pattern_def_si = gsi_start (pattern_def_seq);
6152                 }
6153               else if (!gsi_end_p (pattern_def_si))
6154                 gsi_next (&pattern_def_si);
6155               if (pattern_def_seq != NULL)
6156                 {
6157                   gimple *pattern_def_stmt = NULL;
6158                   stmt_vec_info pattern_def_stmt_info = NULL;
6159
6160                   while (!gsi_end_p (pattern_def_si))
6161                     {
6162                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6163                       pattern_def_stmt_info
6164                         = vinfo_for_stmt (pattern_def_stmt);
6165                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6166                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6167                         break;
6168                       gsi_next (&pattern_def_si);
6169                     }
6170
6171                   if (!gsi_end_p (pattern_def_si))
6172                     {
6173                       if (dump_enabled_p ())
6174                         {
6175                           dump_printf_loc (MSG_NOTE, vect_location,
6176                                            "==> vectorizing pattern def "
6177                                            "stmt: ");
6178                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6179                                             pattern_def_stmt, 0);
6180                           dump_printf (MSG_NOTE, "\n");
6181                         }
6182
6183                       stmt = pattern_def_stmt;
6184                       stmt_info = pattern_def_stmt_info;
6185                     }
6186                   else
6187                     {
6188                       pattern_def_si = gsi_none ();
6189                       transform_pattern_stmt = false;
6190                     }
6191                 }
6192               else
6193                 transform_pattern_stmt = false;
6194             }
6195
6196           if (STMT_VINFO_VECTYPE (stmt_info))
6197             {
6198               unsigned int nunits
6199                 = (unsigned int)
6200                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6201               if (!STMT_SLP_TYPE (stmt_info)
6202                   && nunits != (unsigned int) vectorization_factor
6203                   && dump_enabled_p ())
6204                   /* For SLP VF is set according to unrolling factor, and not
6205                      to vector size, hence for SLP this print is not valid.  */
6206                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6207             }
6208
6209           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6210              reached.  */
6211           if (STMT_SLP_TYPE (stmt_info))
6212             {
6213               if (!slp_scheduled)
6214                 {
6215                   slp_scheduled = true;
6216
6217                   if (dump_enabled_p ())
6218                     dump_printf_loc (MSG_NOTE, vect_location,
6219                                      "=== scheduling SLP instances ===\n");
6220
6221                   vect_schedule_slp (loop_vinfo, NULL);
6222                 }
6223
6224               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6225               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6226                 {
6227                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6228                     {
6229                       pattern_def_seq = NULL;
6230                       gsi_next (&si);
6231                     }
6232                   continue;
6233                 }
6234             }
6235
6236           /* -------- vectorize statement ------------ */
6237           if (dump_enabled_p ())
6238             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6239
6240           grouped_store = false;
6241           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6242           if (is_store)
6243             {
6244               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6245                 {
6246                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6247                      interleaving chain was completed - free all the stores in
6248                      the chain.  */
6249                   gsi_next (&si);
6250                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6251                 }
6252               else
6253                 {
6254                   /* Free the attached stmt_vec_info and remove the stmt.  */
6255                   gimple *store = gsi_stmt (si);
6256                   free_stmt_vec_info (store);
6257                   unlink_stmt_vdef (store);
6258                   gsi_remove (&si, true);
6259                   release_defs (store);
6260                 }
6261
6262               /* Stores can only appear at the end of pattern statements.  */
6263               gcc_assert (!transform_pattern_stmt);
6264               pattern_def_seq = NULL;
6265             }
6266           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6267             {
6268               pattern_def_seq = NULL;
6269               gsi_next (&si);
6270             }
6271         }                       /* stmts in BB */
6272     }                           /* BBs in loop */
6273
6274   slpeel_make_loop_iterate_ntimes (loop, ratio);
6275
6276   /* Reduce loop iterations by the vectorization factor.  */
6277   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6278                       expected_iterations / vectorization_factor);
6279   loop->nb_iterations_upper_bound
6280     = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6281   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6282       && loop->nb_iterations_upper_bound != 0)
6283     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6284   if (loop->any_estimate)
6285     {
6286       loop->nb_iterations_estimate
6287         = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6288        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6289            && loop->nb_iterations_estimate != 0)
6290          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6291     }
6292
6293   if (dump_enabled_p ())
6294     {
6295       dump_printf_loc (MSG_NOTE, vect_location,
6296                        "LOOP VECTORIZED\n");
6297       if (loop->inner)
6298         dump_printf_loc (MSG_NOTE, vect_location,
6299                          "OUTER LOOP VECTORIZED\n");
6300       dump_printf (MSG_NOTE, "\n");
6301     }
6302 }