gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "backend.h"
  27 #include "cfghooks.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "rtl.h"
  31 #include "ssa.h"
  32 #include "alias.h"
  33 #include "fold-const.h"
  34 #include "stor-layout.h"
  35 #include "cfganal.h"
  36 #include "gimple-pretty-print.h"
  37 #include "internal-fn.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-pass.h"
  45 #include "cfgloop.h"
  46 #include "flags.h"
  47 #include "insn-config.h"
  48 #include "expmed.h"
  49 #include "dojump.h"
  50 #include "explow.h"
  51 #include "calls.h"
  52 #include "emit-rtl.h"
  53 #include "varasm.h"
  54 #include "stmt.h"
  55 #include "expr.h"
  56 #include "recog.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "params.h"
  60 #include "diagnostic-core.h"
  61 #include "tree-chrec.h"
  62 #include "tree-scalar-evolution.h"
  63 #include "tree-vectorizer.h"
  64 #include "target.h"
  65
  66 /* Loop Vectorization Pass.
  67
  68    This pass tries to vectorize loops.
  69
  70    For example, the vectorizer transforms the following simple loop:
  71
  72         short a[N]; short b[N]; short c[N]; int i;
  73
  74         for (i=0; i<N; i++){
  75           a[i] = b[i] + c[i];
  76         }
  77
  78    as if it was manually vectorized by rewriting the source code into:
  79
  80         typedef int __attribute__((mode(V8HI))) v8hi;
  81         short a[N];  short b[N]; short c[N];   int i;
  82         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  83         v8hi va, vb, vc;
  84
  85         for (i=0; i<N/8; i++){
  86           vb = pb[i];
  87           vc = pc[i];
  88           va = vb + vc;
  89           pa[i] = va;
  90         }
  91
  92         The main entry to this pass is vectorize_loops(), in which
  93    the vectorizer applies a set of analyses on a given set of loops,
  94    followed by the actual vectorization transformation for the loops that
  95    had successfully passed the analysis phase.
  96         Throughout this pass we make a distinction between two types of
  97    data: scalars (which are represented by SSA_NAMES), and memory references
  98    ("data-refs").  These two types of data require different handling both
  99    during analysis and transformation. The types of data-refs that the
 100    vectorizer currently supports are ARRAY_REFS which base is an array DECL
 101    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
 102    accesses are required to have a simple (consecutive) access pattern.
 103
 104    Analysis phase:
 105    ===============
 106         The driver for the analysis phase is vect_analyze_loop().
 107    It applies a set of analyses, some of which rely on the scalar evolution
 108    analyzer (scev) developed by Sebastian Pop.
 109
 110         During the analysis phase the vectorizer records some information
 111    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
 112    loop, as well as general information about the loop as a whole, which is
 113    recorded in a "loop_vec_info" struct attached to each loop.
 114
 115    Transformation phase:
 116    =====================
 117         The loop transformation phase scans all the stmts in the loop, and
 118    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 119    the loop that needs to be vectorized.  It inserts the vector code sequence
 120    just before the scalar stmt S, and records a pointer to the vector code
 121    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 122    attached to S).  This pointer will be used for the vectorization of following
 123    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 124    otherwise, we rely on dead code elimination for removing it.
 125
 126         For example, say stmt S1 was vectorized into stmt VS1:
 127
 128    VS1: vb = px[i];
 129    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 130    S2:  a = b;
 131
 132    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 133    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 134    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 135    resulting sequence would be:
 136
 137    VS1: vb = px[i];
 138    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 139    VS2: va = vb;
 140    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 141
 142         Operands that are not SSA_NAMEs, are data-refs that appear in
 143    load/store operations (like 'x[i]' in S1), and are handled differently.
 144
 145    Target modeling:
 146    =================
 147         Currently the only target specific information that is used is the
 148    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 149    Targets that can support different sizes of vectors, for now will need
 150    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 151    flexibility will be added in the future.
 152
 153         Since we only vectorize operations which vector form can be
 154    expressed using existing tree codes, to verify that an operation is
 155    supported, the vectorizer checks the relevant optab at the relevant
 156    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 157    the value found is CODE_FOR_nothing, then there's no target support, and
 158    we can't vectorize the stmt.
 159
 160    For additional information on this project see:
 161    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 162 */
 163
 164 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 165
 166 /* Function vect_determine_vectorization_factor
 167
 168    Determine the vectorization factor (VF).  VF is the number of data elements
 169    that are operated upon in parallel in a single iteration of the vectorized
 170    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 171    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 172    elements can fit in a single vector register.
 173
 174    We currently support vectorization of loops in which all types operated upon
 175    are of the same size.  Therefore this function currently sets VF according to
 176    the size of the types operated upon, and fails if there are multiple sizes
 177    in the loop.
 178
 179    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 180    original loop:
 181         for (i=0; i<N; i++){
 182           a[i] = b[i] + c[i];
 183         }
 184
 185    vectorized loop:
 186         for (i=0; i<N; i+=VF){
 187           a[i:VF] = b[i:VF] + c[i:VF];
 188         }
 189 */
 190
 191 static bool
 192 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 193 {
 194   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 195   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 196   int nbbs = loop->num_nodes;
 197   unsigned int vectorization_factor = 0;
 198   tree scalar_type;
 199   gphi *phi;
 200   tree vectype;
 201   unsigned int nunits;
 202   stmt_vec_info stmt_info;
 203   int i;
 204   HOST_WIDE_INT dummy;
 205   gimple stmt, pattern_stmt = NULL;
 206   gimple_seq pattern_def_seq = NULL;
 207   gimple_stmt_iterator pattern_def_si = gsi_none ();
 208   bool analyze_pattern_stmt = false;
 209
 210   if (dump_enabled_p ())
 211     dump_printf_loc (MSG_NOTE, vect_location,
 212                      "=== vect_determine_vectorization_factor ===\n");
 213
 214   for (i = 0; i < nbbs; i++)
 215     {
 216       basic_block bb = bbs[i];
 217
 218       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 219            gsi_next (&si))
 220         {
 221           phi = si.phi ();
 222           stmt_info = vinfo_for_stmt (phi);
 223           if (dump_enabled_p ())
 224             {
 225               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 226               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 227               dump_printf (MSG_NOTE, "\n");
 228             }
 229
 230           gcc_assert (stmt_info);
 231
 232           if (STMT_VINFO_RELEVANT_P (stmt_info))
 233             {
 234               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 235               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 236
 237               if (dump_enabled_p ())
 238                 {
 239                   dump_printf_loc (MSG_NOTE, vect_location,
 240                                    "get vectype for scalar type:  ");
 241                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 242                   dump_printf (MSG_NOTE, "\n");
 243                 }
 244
 245               vectype = get_vectype_for_scalar_type (scalar_type);
 246               if (!vectype)
 247                 {
 248                   if (dump_enabled_p ())
 249                     {
 250                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 251                                        "not vectorized: unsupported "
 252                                        "data-type ");
 253                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 254                                          scalar_type);
 255                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 256                     }
 257                   return false;
 258                 }
 259               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 260
 261               if (dump_enabled_p ())
 262                 {
 263                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 264                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 265                   dump_printf (MSG_NOTE, "\n");
 266                 }
 267
 268               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 269               if (dump_enabled_p ())
 270                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 271                                  nunits);
 272
 273               if (!vectorization_factor
 274                   || (nunits > vectorization_factor))
 275                 vectorization_factor = nunits;
 276             }
 277         }
 278
 279       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 280            !gsi_end_p (si) || analyze_pattern_stmt;)
 281         {
 282           tree vf_vectype;
 283
 284           if (analyze_pattern_stmt)
 285             stmt = pattern_stmt;
 286           else
 287             stmt = gsi_stmt (si);
 288
 289           stmt_info = vinfo_for_stmt (stmt);
 290
 291           if (dump_enabled_p ())
 292             {
 293               dump_printf_loc (MSG_NOTE, vect_location,
 294                                "==> examining statement: ");
 295               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 296               dump_printf (MSG_NOTE, "\n");
 297             }
 298
 299           gcc_assert (stmt_info);
 300
 301           /* Skip stmts which do not need to be vectorized.  */
 302           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 303                && !STMT_VINFO_LIVE_P (stmt_info))
 304               || gimple_clobber_p (stmt))
 305             {
 306               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 307                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 308                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 309                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 310                 {
 311                   stmt = pattern_stmt;
 312                   stmt_info = vinfo_for_stmt (pattern_stmt);
 313                   if (dump_enabled_p ())
 314                     {
 315                       dump_printf_loc (MSG_NOTE, vect_location,
 316                                        "==> examining pattern statement: ");
 317                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 318                       dump_printf (MSG_NOTE, "\n");
 319                     }
 320                 }
 321               else
 322                 {
 323                   if (dump_enabled_p ())
 324                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 325                   gsi_next (&si);
 326                   continue;
 327                 }
 328             }
 329           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 330                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 331                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 332                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 333             analyze_pattern_stmt = true;
 334
 335           /* If a pattern statement has def stmts, analyze them too.  */
 336           if (is_pattern_stmt_p (stmt_info))
 337             {
 338               if (pattern_def_seq == NULL)
 339                 {
 340                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 341                   pattern_def_si = gsi_start (pattern_def_seq);
 342                 }
 343               else if (!gsi_end_p (pattern_def_si))
 344                 gsi_next (&pattern_def_si);
 345               if (pattern_def_seq != NULL)
 346                 {
 347                   gimple pattern_def_stmt = NULL;
 348                   stmt_vec_info pattern_def_stmt_info = NULL;
 349
 350                   while (!gsi_end_p (pattern_def_si))
 351                     {
 352                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 353                       pattern_def_stmt_info
 354                         = vinfo_for_stmt (pattern_def_stmt);
 355                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 356                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 357                         break;
 358                       gsi_next (&pattern_def_si);
 359                     }
 360
 361                   if (!gsi_end_p (pattern_def_si))
 362                     {
 363                       if (dump_enabled_p ())
 364                         {
 365                           dump_printf_loc (MSG_NOTE, vect_location,
 366                                            "==> examining pattern def stmt: ");
 367                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 368                                             pattern_def_stmt, 0);
 369                           dump_printf (MSG_NOTE, "\n");
 370                         }
 371
 372                       stmt = pattern_def_stmt;
 373                       stmt_info = pattern_def_stmt_info;
 374                     }
 375                   else
 376                     {
 377                       pattern_def_si = gsi_none ();
 378                       analyze_pattern_stmt = false;
 379                     }
 380                 }
 381               else
 382                 analyze_pattern_stmt = false;
 383             }
 384
 385           if (gimple_get_lhs (stmt) == NULL_TREE
 386               /* MASK_STORE has no lhs, but is ok.  */
 387               && (!is_gimple_call (stmt)
 388                   || !gimple_call_internal_p (stmt)
 389                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 390             {
 391               if (is_gimple_call (stmt))
 392                 {
 393                   /* Ignore calls with no lhs.  These must be calls to
 394                      #pragma omp simd functions, and what vectorization factor
 395                      it really needs can't be determined until
 396                      vectorizable_simd_clone_call.  */
 397                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 398                     {
 399                       pattern_def_seq = NULL;
 400                       gsi_next (&si);
 401                     }
 402                   continue;
 403                 }
 404               if (dump_enabled_p ())
 405                 {
 406                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 407                                    "not vectorized: irregular stmt.");
 408                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 409                                     0);
 410                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 411                 }
 412               return false;
 413             }
 414
 415           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 416             {
 417               if (dump_enabled_p ())
 418                 {
 419                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 420                                    "not vectorized: vector stmt in loop:");
 421                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 422                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 423                 }
 424               return false;
 425             }
 426
 427           if (STMT_VINFO_VECTYPE (stmt_info))
 428             {
 429               /* The only case when a vectype had been already set is for stmts
 430                  that contain a dataref, or for "pattern-stmts" (stmts
 431                  generated by the vectorizer to represent/replace a certain
 432                  idiom).  */
 433               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 434                           || is_pattern_stmt_p (stmt_info)
 435                           || !gsi_end_p (pattern_def_si));
 436               vectype = STMT_VINFO_VECTYPE (stmt_info);
 437             }
 438           else
 439             {
 440               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 441               if (is_gimple_call (stmt)
 442                   && gimple_call_internal_p (stmt)
 443                   && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
 444                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 445               else
 446                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 447               if (dump_enabled_p ())
 448                 {
 449                   dump_printf_loc (MSG_NOTE, vect_location,
 450                                    "get vectype for scalar type:  ");
 451                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 452                   dump_printf (MSG_NOTE, "\n");
 453                 }
 454               vectype = get_vectype_for_scalar_type (scalar_type);
 455               if (!vectype)
 456                 {
 457                   if (dump_enabled_p ())
 458                     {
 459                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 460                                        "not vectorized: unsupported "
 461                                        "data-type ");
 462                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 463                                          scalar_type);
 464                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 465                     }
 466                   return false;
 467                 }
 468
 469               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 470
 471               if (dump_enabled_p ())
 472                 {
 473                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 474                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 475                   dump_printf (MSG_NOTE, "\n");
 476                 }
 477             }
 478
 479           /* The vectorization factor is according to the smallest
 480              scalar type (or the largest vector size, but we only
 481              support one vector size per loop).  */
 482           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 483                                                        &dummy);
 484           if (dump_enabled_p ())
 485             {
 486               dump_printf_loc (MSG_NOTE, vect_location,
 487                                "get vectype for scalar type:  ");
 488               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 489               dump_printf (MSG_NOTE, "\n");
 490             }
 491           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 492           if (!vf_vectype)
 493             {
 494               if (dump_enabled_p ())
 495                 {
 496                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 497                                    "not vectorized: unsupported data-type ");
 498                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 499                                      scalar_type);
 500                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 501                 }
 502               return false;
 503             }
 504
 505           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 506                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 507             {
 508               if (dump_enabled_p ())
 509                 {
 510                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 511                                    "not vectorized: different sized vector "
 512                                    "types in statement, ");
 513                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 514                                      vectype);
 515                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 516                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 517                                      vf_vectype);
 518                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 519                 }
 520               return false;
 521             }
 522
 523           if (dump_enabled_p ())
 524             {
 525               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 526               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 527               dump_printf (MSG_NOTE, "\n");
 528             }
 529
 530           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 531           if (dump_enabled_p ())
 532             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 533           if (!vectorization_factor
 534               || (nunits > vectorization_factor))
 535             vectorization_factor = nunits;
 536
 537           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 538             {
 539               pattern_def_seq = NULL;
 540               gsi_next (&si);
 541             }
 542         }
 543     }
 544
 545   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 546   if (dump_enabled_p ())
 547     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 548                      vectorization_factor);
 549   if (vectorization_factor <= 1)
 550     {
 551       if (dump_enabled_p ())
 552         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 553                          "not vectorized: unsupported data-type\n");
 554       return false;
 555     }
 556   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 557
 558   return true;
 559 }
 560
 561
 562 /* Function vect_is_simple_iv_evolution.
 563
 564    FORNOW: A simple evolution of an induction variables in the loop is
 565    considered a polynomial evolution.  */
 566
 567 static bool
 568 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 569                              tree * step)
 570 {
 571   tree init_expr;
 572   tree step_expr;
 573   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 574   basic_block bb;
 575
 576   /* When there is no evolution in this loop, the evolution function
 577      is not "simple".  */
 578   if (evolution_part == NULL_TREE)
 579     return false;
 580
 581   /* When the evolution is a polynomial of degree >= 2
 582      the evolution function is not "simple".  */
 583   if (tree_is_chrec (evolution_part))
 584     return false;
 585
 586   step_expr = evolution_part;
 587   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 588
 589   if (dump_enabled_p ())
 590     {
 591       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 592       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 593       dump_printf (MSG_NOTE, ",  init: ");
 594       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 595       dump_printf (MSG_NOTE, "\n");
 596     }
 597
 598   *init = init_expr;
 599   *step = step_expr;
 600
 601   if (TREE_CODE (step_expr) != INTEGER_CST
 602       && (TREE_CODE (step_expr) != SSA_NAME
 603           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 604               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 605           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 606               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 607                   || !flag_associative_math)))
 608       && (TREE_CODE (step_expr) != REAL_CST
 609           || !flag_associative_math))
 610     {
 611       if (dump_enabled_p ())
 612         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 613                          "step unknown.\n");
 614       return false;
 615     }
 616
 617   return true;
 618 }
 619
 620 /* Function vect_analyze_scalar_cycles_1.
 621
 622    Examine the cross iteration def-use cycles of scalar variables
 623    in LOOP.  LOOP_VINFO represents the loop that is now being
 624    considered for vectorization (can be LOOP, or an outer-loop
 625    enclosing LOOP).  */
 626
 627 static void
 628 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 629 {
 630   basic_block bb = loop->header;
 631   tree init, step;
 632   auto_vec<gimple, 64> worklist;
 633   gphi_iterator gsi;
 634   bool double_reduc;
 635
 636   if (dump_enabled_p ())
 637     dump_printf_loc (MSG_NOTE, vect_location,
 638                      "=== vect_analyze_scalar_cycles ===\n");
 639
 640   /* First - identify all inductions.  Reduction detection assumes that all the
 641      inductions have been identified, therefore, this order must not be
 642      changed.  */
 643   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 644     {
 645       gphi *phi = gsi.phi ();
 646       tree access_fn = NULL;
 647       tree def = PHI_RESULT (phi);
 648       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 649
 650       if (dump_enabled_p ())
 651         {
 652           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 653           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 654           dump_printf (MSG_NOTE, "\n");
 655         }
 656
 657       /* Skip virtual phi's.  The data dependences that are associated with
 658          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 659       if (virtual_operand_p (def))
 660         continue;
 661
 662       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 663
 664       /* Analyze the evolution function.  */
 665       access_fn = analyze_scalar_evolution (loop, def);
 666       if (access_fn)
 667         {
 668           STRIP_NOPS (access_fn);
 669           if (dump_enabled_p ())
 670             {
 671               dump_printf_loc (MSG_NOTE, vect_location,
 672                                "Access function of PHI: ");
 673               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 674               dump_printf (MSG_NOTE, "\n");
 675             }
 676           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 677             = evolution_part_in_loop_num (access_fn, loop->num);
 678         }
 679
 680       if (!access_fn
 681           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 682           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 683               && TREE_CODE (step) != INTEGER_CST))
 684         {
 685           worklist.safe_push (phi);
 686           continue;
 687         }
 688
 689       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 690
 691       if (dump_enabled_p ())
 692         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 693       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 694     }
 695
 696
 697   /* Second - identify all reductions and nested cycles.  */
 698   while (worklist.length () > 0)
 699     {
 700       gimple phi = worklist.pop ();
 701       tree def = PHI_RESULT (phi);
 702       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 703       gimple reduc_stmt;
 704       bool nested_cycle;
 705
 706       if (dump_enabled_p ())
 707         {
 708           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 709           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 710           dump_printf (MSG_NOTE, "\n");
 711         }
 712
 713       gcc_assert (!virtual_operand_p (def)
 714                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 715
 716       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 717       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 718                                                 &double_reduc, false);
 719       if (reduc_stmt)
 720         {
 721           if (double_reduc)
 722             {
 723               if (dump_enabled_p ())
 724                 dump_printf_loc (MSG_NOTE, vect_location,
 725                                  "Detected double reduction.\n");
 726
 727               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 728               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 729                                                     vect_double_reduction_def;
 730             }
 731           else
 732             {
 733               if (nested_cycle)
 734                 {
 735                   if (dump_enabled_p ())
 736                     dump_printf_loc (MSG_NOTE, vect_location,
 737                                      "Detected vectorizable nested cycle.\n");
 738
 739                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 740                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 741                                                              vect_nested_cycle;
 742                 }
 743               else
 744                 {
 745                   if (dump_enabled_p ())
 746                     dump_printf_loc (MSG_NOTE, vect_location,
 747                                      "Detected reduction.\n");
 748
 749                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 750                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 751                                                            vect_reduction_def;
 752                   /* Store the reduction cycles for possible vectorization in
 753                      loop-aware SLP.  */
 754                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 755                 }
 756             }
 757         }
 758       else
 759         if (dump_enabled_p ())
 760           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 761                            "Unknown def-use cycle pattern.\n");
 762     }
 763 }
 764
 765
 766 /* Function vect_analyze_scalar_cycles.
 767
 768    Examine the cross iteration def-use cycles of scalar variables, by
 769    analyzing the loop-header PHIs of scalar variables.  Classify each
 770    cycle as one of the following: invariant, induction, reduction, unknown.
 771    We do that for the loop represented by LOOP_VINFO, and also to its
 772    inner-loop, if exists.
 773    Examples for scalar cycles:
 774
 775    Example1: reduction:
 776
 777               loop1:
 778               for (i=0; i<N; i++)
 779                  sum += a[i];
 780
 781    Example2: induction:
 782
 783               loop2:
 784               for (i=0; i<N; i++)
 785                  a[i] = i;  */
 786
 787 static void
 788 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 789 {
 790   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 791
 792   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 793
 794   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 795      Reductions in such inner-loop therefore have different properties than
 796      the reductions in the nest that gets vectorized:
 797      1. When vectorized, they are executed in the same order as in the original
 798         scalar loop, so we can't change the order of computation when
 799         vectorizing them.
 800      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 801         current checks are too strict.  */
 802
 803   if (loop->inner)
 804     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 805 }
 806
 807 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 808
 809 static void
 810 vect_fixup_reduc_chain (gimple stmt)
 811 {
 812   gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 813   gimple stmtp;
 814   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 815               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 816   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 817   do
 818     {
 819       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 820       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 821       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 822       if (stmt)
 823         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 824           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 825     }
 826   while (stmt);
 827   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 828 }
 829
 830 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 831
 832 static void
 833 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 834 {
 835   gimple first;
 836   unsigned i;
 837
 838   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 839     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 840       {
 841         vect_fixup_reduc_chain (first);
 842         LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 843           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 844       }
 845 }
 846
 847 /* Function vect_get_loop_niters.
 848
 849    Determine how many iterations the loop is executed and place it
 850    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
 851    in NUMBER_OF_ITERATIONSM1.
 852
 853    Return the loop exit condition.  */
 854
 855
 856 static gcond *
 857 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
 858                       tree *number_of_iterationsm1)
 859 {
 860   tree niters;
 861
 862   if (dump_enabled_p ())
 863     dump_printf_loc (MSG_NOTE, vect_location,
 864                      "=== get_loop_niters ===\n");
 865
 866   niters = number_of_latch_executions (loop);
 867   *number_of_iterationsm1 = niters;
 868
 869   /* We want the number of loop header executions which is the number
 870      of latch executions plus one.
 871      ???  For UINT_MAX latch executions this number overflows to zero
 872      for loops like do { n++; } while (n != 0);  */
 873   if (niters && !chrec_contains_undetermined (niters))
 874     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
 875                           build_int_cst (TREE_TYPE (niters), 1));
 876   *number_of_iterations = niters;
 877
 878   return get_loop_exit_condition (loop);
 879 }
 880
 881
 882 /* Function bb_in_loop_p
 883
 884    Used as predicate for dfs order traversal of the loop bbs.  */
 885
 886 static bool
 887 bb_in_loop_p (const_basic_block bb, const void *data)
 888 {
 889   const struct loop *const loop = (const struct loop *)data;
 890   if (flow_bb_inside_loop_p (loop, bb))
 891     return true;
 892   return false;
 893 }
 894
 895
 896 /* Function new_loop_vec_info.
 897
 898    Create and initialize a new loop_vec_info struct for LOOP, as well as
 899    stmt_vec_info structs for all the stmts in LOOP.  */
 900
 901 static loop_vec_info
 902 new_loop_vec_info (struct loop *loop)
 903 {
 904   loop_vec_info res;
 905   basic_block *bbs;
 906   gimple_stmt_iterator si;
 907   unsigned int i, nbbs;
 908
 909   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 910   LOOP_VINFO_LOOP (res) = loop;
 911
 912   bbs = get_loop_body (loop);
 913
 914   /* Create/Update stmt_info for all stmts in the loop.  */
 915   for (i = 0; i < loop->num_nodes; i++)
 916     {
 917       basic_block bb = bbs[i];
 918
 919       /* BBs in a nested inner-loop will have been already processed (because
 920          we will have called vect_analyze_loop_form for any nested inner-loop).
 921          Therefore, for stmts in an inner-loop we just want to update the
 922          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 923          loop_info of the outer-loop we are currently considering to vectorize
 924          (instead of the loop_info of the inner-loop).
 925          For stmts in other BBs we need to create a stmt_info from scratch.  */
 926       if (bb->loop_father != loop)
 927         {
 928           /* Inner-loop bb.  */
 929           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 930           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 931             {
 932               gimple phi = gsi_stmt (si);
 933               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 934               loop_vec_info inner_loop_vinfo =
 935                 STMT_VINFO_LOOP_VINFO (stmt_info);
 936               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 937               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 938             }
 939           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 940            {
 941               gimple stmt = gsi_stmt (si);
 942               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 943               loop_vec_info inner_loop_vinfo =
 944                  STMT_VINFO_LOOP_VINFO (stmt_info);
 945               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 946               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 947            }
 948         }
 949       else
 950         {
 951           /* bb in current nest.  */
 952           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 953             {
 954               gimple phi = gsi_stmt (si);
 955               gimple_set_uid (phi, 0);
 956               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 957             }
 958
 959           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 960             {
 961               gimple stmt = gsi_stmt (si);
 962               gimple_set_uid (stmt, 0);
 963               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 964             }
 965         }
 966     }
 967
 968   /* CHECKME: We want to visit all BBs before their successors (except for
 969      latch blocks, for which this assertion wouldn't hold).  In the simple
 970      case of the loop forms we allow, a dfs order of the BBs would the same
 971      as reversed postorder traversal, so we are safe.  */
 972
 973    free (bbs);
 974    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 975    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 976                               bbs, loop->num_nodes, loop);
 977    gcc_assert (nbbs == loop->num_nodes);
 978
 979   LOOP_VINFO_BBS (res) = bbs;
 980   LOOP_VINFO_NITERSM1 (res) = NULL;
 981   LOOP_VINFO_NITERS (res) = NULL;
 982   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 983   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 984   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
 985   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 986   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
 987   LOOP_VINFO_VECT_FACTOR (res) = 0;
 988   LOOP_VINFO_LOOP_NEST (res).create (3);
 989   LOOP_VINFO_DATAREFS (res).create (10);
 990   LOOP_VINFO_DDRS (res).create (10 * 10);
 991   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 992   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 993              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 994   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 995              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 996   LOOP_VINFO_GROUPED_STORES (res).create (10);
 997   LOOP_VINFO_REDUCTIONS (res).create (10);
 998   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 999   LOOP_VINFO_SLP_INSTANCES (res).create (10);
1000   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1001   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1002   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1003   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1004   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1005
1006   return res;
1007 }
1008
1009
1010 /* Function destroy_loop_vec_info.
1011
1012    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1013    stmts in the loop.  */
1014
1015 void
1016 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1017 {
1018   struct loop *loop;
1019   basic_block *bbs;
1020   int nbbs;
1021   gimple_stmt_iterator si;
1022   int j;
1023   vec<slp_instance> slp_instances;
1024   slp_instance instance;
1025   bool swapped;
1026
1027   if (!loop_vinfo)
1028     return;
1029
1030   loop = LOOP_VINFO_LOOP (loop_vinfo);
1031
1032   bbs = LOOP_VINFO_BBS (loop_vinfo);
1033   nbbs = clean_stmts ? loop->num_nodes : 0;
1034   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1035
1036   for (j = 0; j < nbbs; j++)
1037     {
1038       basic_block bb = bbs[j];
1039       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1040         free_stmt_vec_info (gsi_stmt (si));
1041
1042       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1043         {
1044           gimple stmt = gsi_stmt (si);
1045
1046           /* We may have broken canonical form by moving a constant
1047              into RHS1 of a commutative op.  Fix such occurrences.  */
1048           if (swapped && is_gimple_assign (stmt))
1049             {
1050               enum tree_code code = gimple_assign_rhs_code (stmt);
1051
1052               if ((code == PLUS_EXPR
1053                    || code == POINTER_PLUS_EXPR
1054                    || code == MULT_EXPR)
1055                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1056                 swap_ssa_operands (stmt,
1057                                    gimple_assign_rhs1_ptr (stmt),
1058                                    gimple_assign_rhs2_ptr (stmt));
1059             }
1060
1061           /* Free stmt_vec_info.  */
1062           free_stmt_vec_info (stmt);
1063           gsi_next (&si);
1064         }
1065     }
1066
1067   free (LOOP_VINFO_BBS (loop_vinfo));
1068   vect_destroy_datarefs (loop_vinfo, NULL);
1069   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1070   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1071   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1072   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1073   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1075     vect_free_slp_instance (instance);
1076
1077   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1078   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1079   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1080   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1081
1082   delete LOOP_VINFO_PEELING_HTAB (loop_vinfo);
1083   LOOP_VINFO_PEELING_HTAB (loop_vinfo) = NULL;
1084
1085   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1086   loop_vinfo->scalar_cost_vec.release ();
1087
1088   free (loop_vinfo);
1089   loop->aux = NULL;
1090 }
1091
1092
1093 /* Calculate the cost of one scalar iteration of the loop.  */
1094 static void
1095 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1096 {
1097   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1098   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1099   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1100   int innerloop_iters, i;
1101
1102   /* Count statements in scalar loop.  Using this as scalar cost for a single
1103      iteration for now.
1104
1105      TODO: Add outer loop support.
1106
1107      TODO: Consider assigning different costs to different scalar
1108      statements.  */
1109
1110   /* FORNOW.  */
1111   innerloop_iters = 1;
1112   if (loop->inner)
1113     innerloop_iters = 50; /* FIXME */
1114
1115   for (i = 0; i < nbbs; i++)
1116     {
1117       gimple_stmt_iterator si;
1118       basic_block bb = bbs[i];
1119
1120       if (bb->loop_father == loop->inner)
1121         factor = innerloop_iters;
1122       else
1123         factor = 1;
1124
1125       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1126         {
1127           gimple stmt = gsi_stmt (si);
1128           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1129
1130           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1131             continue;
1132
1133           /* Skip stmts that are not vectorized inside the loop.  */
1134           if (stmt_info
1135               && !STMT_VINFO_RELEVANT_P (stmt_info)
1136               && (!STMT_VINFO_LIVE_P (stmt_info)
1137                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1138               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1139             continue;
1140
1141           vect_cost_for_stmt kind;
1142           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1143             {
1144               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1145                kind = scalar_load;
1146              else
1147                kind = scalar_store;
1148             }
1149           else
1150             kind = scalar_stmt;
1151
1152           scalar_single_iter_cost
1153             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1154                                  factor, kind, NULL, 0, vect_prologue);
1155         }
1156     }
1157   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1158     = scalar_single_iter_cost;
1159 }
1160
1161
1162 /* Function vect_analyze_loop_1.
1163
1164    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1165    for it. The different analyses will record information in the
1166    loop_vec_info struct.  This is a subset of the analyses applied in
1167    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1168    that is now considered for (outer-loop) vectorization.  */
1169
1170 static loop_vec_info
1171 vect_analyze_loop_1 (struct loop *loop)
1172 {
1173   loop_vec_info loop_vinfo;
1174
1175   if (dump_enabled_p ())
1176     dump_printf_loc (MSG_NOTE, vect_location,
1177                      "===== analyze_loop_nest_1 =====\n");
1178
1179   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1180
1181   loop_vinfo = vect_analyze_loop_form (loop);
1182   if (!loop_vinfo)
1183     {
1184       if (dump_enabled_p ())
1185         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1186                          "bad inner-loop form.\n");
1187       return NULL;
1188     }
1189
1190   return loop_vinfo;
1191 }
1192
1193
1194 /* Function vect_analyze_loop_form.
1195
1196    Verify that certain CFG restrictions hold, including:
1197    - the loop has a pre-header
1198    - the loop has a single entry and exit
1199    - the loop exit condition is simple enough, and the number of iterations
1200      can be analyzed (a countable loop).  */
1201
1202 loop_vec_info
1203 vect_analyze_loop_form (struct loop *loop)
1204 {
1205   loop_vec_info loop_vinfo;
1206   gcond *loop_cond;
1207   tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
1208   loop_vec_info inner_loop_vinfo = NULL;
1209
1210   if (dump_enabled_p ())
1211     dump_printf_loc (MSG_NOTE, vect_location,
1212                      "=== vect_analyze_loop_form ===\n");
1213
1214   /* Different restrictions apply when we are considering an inner-most loop,
1215      vs. an outer (nested) loop.
1216      (FORNOW. May want to relax some of these restrictions in the future).  */
1217
1218   if (!loop->inner)
1219     {
1220       /* Inner-most loop.  We currently require that the number of BBs is
1221          exactly 2 (the header and latch).  Vectorizable inner-most loops
1222          look like this:
1223
1224                         (pre-header)
1225                            |
1226                           header <--------+
1227                            | |            |
1228                            | +--> latch --+
1229                            |
1230                         (exit-bb)  */
1231
1232       if (loop->num_nodes != 2)
1233         {
1234           if (dump_enabled_p ())
1235             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1236                              "not vectorized: control flow in loop.\n");
1237           return NULL;
1238         }
1239
1240       if (empty_block_p (loop->header))
1241         {
1242           if (dump_enabled_p ())
1243             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1244                              "not vectorized: empty loop.\n");
1245           return NULL;
1246         }
1247     }
1248   else
1249     {
1250       struct loop *innerloop = loop->inner;
1251       edge entryedge;
1252
1253       /* Nested loop. We currently require that the loop is doubly-nested,
1254          contains a single inner loop, and the number of BBs is exactly 5.
1255          Vectorizable outer-loops look like this:
1256
1257                         (pre-header)
1258                            |
1259                           header <---+
1260                            |         |
1261                           inner-loop |
1262                            |         |
1263                           tail ------+
1264                            |
1265                         (exit-bb)
1266
1267          The inner-loop has the properties expected of inner-most loops
1268          as described above.  */
1269
1270       if ((loop->inner)->inner || (loop->inner)->next)
1271         {
1272           if (dump_enabled_p ())
1273             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1274                              "not vectorized: multiple nested loops.\n");
1275           return NULL;
1276         }
1277
1278       /* Analyze the inner-loop.  */
1279       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1280       if (!inner_loop_vinfo)
1281         {
1282           if (dump_enabled_p ())
1283             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1284                              "not vectorized: Bad inner loop.\n");
1285           return NULL;
1286         }
1287
1288       if (!expr_invariant_in_loop_p (loop,
1289                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1290         {
1291           if (dump_enabled_p ())
1292             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1293                              "not vectorized: inner-loop count not"
1294                              " invariant.\n");
1295           destroy_loop_vec_info (inner_loop_vinfo, true);
1296           return NULL;
1297         }
1298
1299       if (loop->num_nodes != 5)
1300         {
1301           if (dump_enabled_p ())
1302             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1303                              "not vectorized: control flow in loop.\n");
1304           destroy_loop_vec_info (inner_loop_vinfo, true);
1305           return NULL;
1306         }
1307
1308       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1309       entryedge = EDGE_PRED (innerloop->header, 0);
1310       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1311         entryedge = EDGE_PRED (innerloop->header, 1);
1312
1313       if (entryedge->src != loop->header
1314           || !single_exit (innerloop)
1315           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1316         {
1317           if (dump_enabled_p ())
1318             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1319                              "not vectorized: unsupported outerloop form.\n");
1320           destroy_loop_vec_info (inner_loop_vinfo, true);
1321           return NULL;
1322         }
1323
1324       if (dump_enabled_p ())
1325         dump_printf_loc (MSG_NOTE, vect_location,
1326                          "Considering outer-loop vectorization.\n");
1327     }
1328
1329   if (!single_exit (loop)
1330       || EDGE_COUNT (loop->header->preds) != 2)
1331     {
1332       if (dump_enabled_p ())
1333         {
1334           if (!single_exit (loop))
1335             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1336                              "not vectorized: multiple exits.\n");
1337           else if (EDGE_COUNT (loop->header->preds) != 2)
1338             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1339                              "not vectorized: too many incoming edges.\n");
1340         }
1341       if (inner_loop_vinfo)
1342         destroy_loop_vec_info (inner_loop_vinfo, true);
1343       return NULL;
1344     }
1345
1346   /* We assume that the loop exit condition is at the end of the loop. i.e,
1347      that the loop is represented as a do-while (with a proper if-guard
1348      before the loop if needed), where the loop header contains all the
1349      executable statements, and the latch is empty.  */
1350   if (!empty_block_p (loop->latch)
1351       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1352     {
1353       if (dump_enabled_p ())
1354         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1355                          "not vectorized: latch block not empty.\n");
1356       if (inner_loop_vinfo)
1357         destroy_loop_vec_info (inner_loop_vinfo, true);
1358       return NULL;
1359     }
1360
1361   /* Make sure there exists a single-predecessor exit bb:  */
1362   if (!single_pred_p (single_exit (loop)->dest))
1363     {
1364       edge e = single_exit (loop);
1365       if (!(e->flags & EDGE_ABNORMAL))
1366         {
1367           split_loop_exit_edge (e);
1368           if (dump_enabled_p ())
1369             dump_printf (MSG_NOTE, "split exit edge.\n");
1370         }
1371       else
1372         {
1373           if (dump_enabled_p ())
1374             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1375                              "not vectorized: abnormal loop exit edge.\n");
1376           if (inner_loop_vinfo)
1377             destroy_loop_vec_info (inner_loop_vinfo, true);
1378           return NULL;
1379         }
1380     }
1381
1382   loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
1383                                     &number_of_iterationsm1);
1384   if (!loop_cond)
1385     {
1386       if (dump_enabled_p ())
1387         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1388                          "not vectorized: complicated exit condition.\n");
1389       if (inner_loop_vinfo)
1390         destroy_loop_vec_info (inner_loop_vinfo, true);
1391       return NULL;
1392     }
1393
1394   if (!number_of_iterations
1395       || chrec_contains_undetermined (number_of_iterations))
1396     {
1397       if (dump_enabled_p ())
1398         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1399                          "not vectorized: number of iterations cannot be "
1400                          "computed.\n");
1401       if (inner_loop_vinfo)
1402         destroy_loop_vec_info (inner_loop_vinfo, true);
1403       return NULL;
1404     }
1405
1406   if (integer_zerop (number_of_iterations))
1407     {
1408       if (dump_enabled_p ())
1409         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1410                          "not vectorized: number of iterations = 0.\n");
1411       if (inner_loop_vinfo)
1412         destroy_loop_vec_info (inner_loop_vinfo, true);
1413       return NULL;
1414     }
1415
1416   loop_vinfo = new_loop_vec_info (loop);
1417   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1418   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1419   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1420
1421   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1422     {
1423       if (dump_enabled_p ())
1424         {
1425           dump_printf_loc (MSG_NOTE, vect_location,
1426                            "Symbolic number of iterations is ");
1427           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1428           dump_printf (MSG_NOTE, "\n");
1429         }
1430     }
1431
1432   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1433
1434   /* CHECKME: May want to keep it around it in the future.  */
1435   if (inner_loop_vinfo)
1436     destroy_loop_vec_info (inner_loop_vinfo, false);
1437
1438   gcc_assert (!loop->aux);
1439   loop->aux = loop_vinfo;
1440   return loop_vinfo;
1441 }
1442
1443 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1444    statements update the vectorization factor.  */
1445
1446 static void
1447 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1448 {
1449   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1450   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1451   int nbbs = loop->num_nodes;
1452   unsigned int vectorization_factor;
1453   int i;
1454
1455   if (dump_enabled_p ())
1456     dump_printf_loc (MSG_NOTE, vect_location,
1457                      "=== vect_update_vf_for_slp ===\n");
1458
1459   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1460   gcc_assert (vectorization_factor != 0);
1461
1462   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1463      vectorization factor of the loop is the unrolling factor required by
1464      the SLP instances.  If that unrolling factor is 1, we say, that we
1465      perform pure SLP on loop - cross iteration parallelism is not
1466      exploited.  */
1467   bool only_slp_in_loop = true;
1468   for (i = 0; i < nbbs; i++)
1469     {
1470       basic_block bb = bbs[i];
1471       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1472            gsi_next (&si))
1473         {
1474           gimple stmt = gsi_stmt (si);
1475           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1476           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1477               && STMT_VINFO_RELATED_STMT (stmt_info))
1478             {
1479               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1480               stmt_info = vinfo_for_stmt (stmt);
1481             }
1482           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1483                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1484               && !PURE_SLP_STMT (stmt_info))
1485             /* STMT needs both SLP and loop-based vectorization.  */
1486             only_slp_in_loop = false;
1487         }
1488     }
1489
1490   if (only_slp_in_loop)
1491     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1492   else
1493     vectorization_factor
1494       = least_common_multiple (vectorization_factor,
1495                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1496
1497   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1498   if (dump_enabled_p ())
1499     dump_printf_loc (MSG_NOTE, vect_location,
1500                      "Updating vectorization factor to %d\n",
1501                      vectorization_factor);
1502 }
1503
1504 /* Function vect_analyze_loop_operations.
1505
1506    Scan the loop stmts and make sure they are all vectorizable.  */
1507
1508 static bool
1509 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1510 {
1511   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1512   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1513   int nbbs = loop->num_nodes;
1514   unsigned int vectorization_factor;
1515   int i;
1516   stmt_vec_info stmt_info;
1517   bool need_to_vectorize = false;
1518   int min_profitable_iters;
1519   int min_scalar_loop_bound;
1520   unsigned int th;
1521   bool ok;
1522   HOST_WIDE_INT max_niter;
1523   HOST_WIDE_INT estimated_niter;
1524   int min_profitable_estimate;
1525
1526   if (dump_enabled_p ())
1527     dump_printf_loc (MSG_NOTE, vect_location,
1528                      "=== vect_analyze_loop_operations ===\n");
1529
1530   for (i = 0; i < nbbs; i++)
1531     {
1532       basic_block bb = bbs[i];
1533
1534       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1535            gsi_next (&si))
1536         {
1537           gphi *phi = si.phi ();
1538           ok = true;
1539
1540           stmt_info = vinfo_for_stmt (phi);
1541           if (dump_enabled_p ())
1542             {
1543               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1544               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1545               dump_printf (MSG_NOTE, "\n");
1546             }
1547
1548           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1549              (i.e., a phi in the tail of the outer-loop).  */
1550           if (! is_loop_header_bb_p (bb))
1551             {
1552               /* FORNOW: we currently don't support the case that these phis
1553                  are not used in the outerloop (unless it is double reduction,
1554                  i.e., this phi is vect_reduction_def), cause this case
1555                  requires to actually do something here.  */
1556               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1557                    || STMT_VINFO_LIVE_P (stmt_info))
1558                   && STMT_VINFO_DEF_TYPE (stmt_info)
1559                      != vect_double_reduction_def)
1560                 {
1561                   if (dump_enabled_p ())
1562                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1563                                      "Unsupported loop-closed phi in "
1564                                      "outer-loop.\n");
1565                   return false;
1566                 }
1567
1568               /* If PHI is used in the outer loop, we check that its operand
1569                  is defined in the inner loop.  */
1570               if (STMT_VINFO_RELEVANT_P (stmt_info))
1571                 {
1572                   tree phi_op;
1573                   gimple op_def_stmt;
1574
1575                   if (gimple_phi_num_args (phi) != 1)
1576                     return false;
1577
1578                   phi_op = PHI_ARG_DEF (phi, 0);
1579                   if (TREE_CODE (phi_op) != SSA_NAME)
1580                     return false;
1581
1582                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1583                   if (gimple_nop_p (op_def_stmt)
1584                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1585                       || !vinfo_for_stmt (op_def_stmt))
1586                     return false;
1587
1588                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1589                         != vect_used_in_outer
1590                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1591                            != vect_used_in_outer_by_reduction)
1592                     return false;
1593                 }
1594
1595               continue;
1596             }
1597
1598           gcc_assert (stmt_info);
1599
1600           if (STMT_VINFO_LIVE_P (stmt_info))
1601             {
1602               /* FORNOW: not yet supported.  */
1603               if (dump_enabled_p ())
1604                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1605                                  "not vectorized: value used after loop.\n");
1606               return false;
1607             }
1608
1609           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1610               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1611             {
1612               /* A scalar-dependence cycle that we don't support.  */
1613               if (dump_enabled_p ())
1614                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1615                                  "not vectorized: scalar dependence cycle.\n");
1616               return false;
1617             }
1618
1619           if (STMT_VINFO_RELEVANT_P (stmt_info))
1620             {
1621               need_to_vectorize = true;
1622               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1623                 ok = vectorizable_induction (phi, NULL, NULL);
1624             }
1625
1626           if (!ok)
1627             {
1628               if (dump_enabled_p ())
1629                 {
1630                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1631                                    "not vectorized: relevant phi not "
1632                                    "supported: ");
1633                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1634                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1635                 }
1636               return false;
1637             }
1638         }
1639
1640       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1641            gsi_next (&si))
1642         {
1643           gimple stmt = gsi_stmt (si);
1644           if (!gimple_clobber_p (stmt)
1645               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1646             return false;
1647         }
1648     } /* bbs */
1649
1650   /* All operations in the loop are either irrelevant (deal with loop
1651      control, or dead), or only used outside the loop and can be moved
1652      out of the loop (e.g. invariants, inductions).  The loop can be
1653      optimized away by scalar optimizations.  We're better off not
1654      touching this loop.  */
1655   if (!need_to_vectorize)
1656     {
1657       if (dump_enabled_p ())
1658         dump_printf_loc (MSG_NOTE, vect_location,
1659                          "All the computation can be taken out of the loop.\n");
1660       if (dump_enabled_p ())
1661         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1662                          "not vectorized: redundant loop. no profit to "
1663                          "vectorize.\n");
1664       return false;
1665     }
1666
1667   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1668   gcc_assert (vectorization_factor != 0);
1669
1670   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1671     dump_printf_loc (MSG_NOTE, vect_location,
1672                      "vectorization_factor = %d, niters = "
1673                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1674                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1675
1676   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1677        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1678       || ((max_niter = max_stmt_executions_int (loop)) != -1
1679           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1680     {
1681       if (dump_enabled_p ())
1682         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1683                          "not vectorized: iteration count too small.\n");
1684       if (dump_enabled_p ())
1685         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                          "not vectorized: iteration count smaller than "
1687                          "vectorization factor.\n");
1688       return false;
1689     }
1690
1691   /* Analyze cost.  Decide if worth while to vectorize.  */
1692
1693   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1694                                       &min_profitable_estimate);
1695   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1696
1697   if (min_profitable_iters < 0)
1698     {
1699       if (dump_enabled_p ())
1700         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1701                          "not vectorized: vectorization not profitable.\n");
1702       if (dump_enabled_p ())
1703         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                          "not vectorized: vector version will never be "
1705                          "profitable.\n");
1706       return false;
1707     }
1708
1709   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1710                             * vectorization_factor) - 1);
1711
1712
1713   /* Use the cost model only if it is more conservative than user specified
1714      threshold.  */
1715
1716   th = (unsigned) min_scalar_loop_bound;
1717   if (min_profitable_iters
1718       && (!min_scalar_loop_bound
1719           || min_profitable_iters > min_scalar_loop_bound))
1720     th = (unsigned) min_profitable_iters;
1721
1722   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
1723
1724   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1725       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1726     {
1727       if (dump_enabled_p ())
1728         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1729                          "not vectorized: vectorization not profitable.\n");
1730       if (dump_enabled_p ())
1731         dump_printf_loc (MSG_NOTE, vect_location,
1732                          "not vectorized: iteration count smaller than user "
1733                          "specified loop bound parameter or minimum profitable "
1734                          "iterations (whichever is more conservative).\n");
1735       return false;
1736     }
1737
1738   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1739       && ((unsigned HOST_WIDE_INT) estimated_niter
1740           <= MAX (th, (unsigned)min_profitable_estimate)))
1741     {
1742       if (dump_enabled_p ())
1743         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1744                          "not vectorized: estimated iteration count too "
1745                          "small.\n");
1746       if (dump_enabled_p ())
1747         dump_printf_loc (MSG_NOTE, vect_location,
1748                          "not vectorized: estimated iteration count smaller "
1749                          "than specified loop bound parameter or minimum "
1750                          "profitable iterations (whichever is more "
1751                          "conservative).\n");
1752       return false;
1753     }
1754
1755   return true;
1756 }
1757
1758
1759 /* Function vect_analyze_loop_2.
1760
1761    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1762    for it.  The different analyses will record information in the
1763    loop_vec_info struct.  */
1764 static bool
1765 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1766 {
1767   bool ok;
1768   int max_vf = MAX_VECTORIZATION_FACTOR;
1769   int min_vf = 2;
1770   unsigned int th;
1771   unsigned int n_stmts = 0;
1772
1773   /* Find all data references in the loop (which correspond to vdefs/vuses)
1774      and analyze their evolution in the loop.  Also adjust the minimal
1775      vectorization factor according to the loads and stores.
1776
1777      FORNOW: Handle only simple, array references, which
1778      alignment can be forced, and aligned pointer-references.  */
1779
1780   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf, &n_stmts);
1781   if (!ok)
1782     {
1783       if (dump_enabled_p ())
1784         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1785                          "bad data references.\n");
1786       return false;
1787     }
1788
1789   /* Classify all cross-iteration scalar data-flow cycles.
1790      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1791
1792   vect_analyze_scalar_cycles (loop_vinfo);
1793
1794   vect_pattern_recog (loop_vinfo, NULL);
1795
1796   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1797
1798   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1799      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1800
1801   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1802   if (!ok)
1803     {
1804       if (dump_enabled_p ())
1805         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806                          "bad data access.\n");
1807       return false;
1808     }
1809
1810   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1811
1812   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1813   if (!ok)
1814     {
1815       if (dump_enabled_p ())
1816         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817                          "unexpected pattern.\n");
1818       return false;
1819     }
1820
1821   /* Analyze data dependences between the data-refs in the loop
1822      and adjust the maximum vectorization factor according to
1823      the dependences.
1824      FORNOW: fail at the first data dependence that we encounter.  */
1825
1826   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1827   if (!ok
1828       || max_vf < min_vf)
1829     {
1830       if (dump_enabled_p ())
1831             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832                              "bad data dependence.\n");
1833       return false;
1834     }
1835
1836   ok = vect_determine_vectorization_factor (loop_vinfo);
1837   if (!ok)
1838     {
1839       if (dump_enabled_p ())
1840         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1841                          "can't determine vectorization factor.\n");
1842       return false;
1843     }
1844   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1845     {
1846       if (dump_enabled_p ())
1847         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848                          "bad data dependence.\n");
1849       return false;
1850     }
1851
1852   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1853   ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
1854   if (!ok)
1855     return false;
1856
1857   /* If there are any SLP instances mark them as pure_slp.  */
1858   bool slp = vect_make_slp_decision (loop_vinfo);
1859   if (slp)
1860     {
1861       /* Find stmts that need to be both vectorized and SLPed.  */
1862       vect_detect_hybrid_slp (loop_vinfo);
1863
1864       /* Update the vectorization factor based on the SLP decision.  */
1865       vect_update_vf_for_slp (loop_vinfo);
1866     }
1867
1868   /* Analyze the alignment of the data-refs in the loop.
1869      Fail if a data reference is found that cannot be vectorized.  */
1870
1871   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1872   if (!ok)
1873     {
1874       if (dump_enabled_p ())
1875         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1876                          "bad data alignment.\n");
1877       return false;
1878     }
1879
1880   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1881      It is important to call pruning after vect_analyze_data_ref_accesses,
1882      since we use grouping information gathered by interleaving analysis.  */
1883   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1884   if (!ok)
1885     {
1886       if (dump_enabled_p ())
1887         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888                          "number of versioning for alias "
1889                          "run-time tests exceeds %d "
1890                          "(--param vect-max-version-for-alias-checks)\n",
1891                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1892       return false;
1893     }
1894
1895   /* Compute the scalar iteration cost.  */
1896   vect_get_single_scalar_iteration_cost (loop_vinfo);
1897
1898   /* This pass will decide on using loop versioning and/or loop peeling in
1899      order to enhance the alignment of data references in the loop.  */
1900
1901   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1902   if (!ok)
1903     {
1904       if (dump_enabled_p ())
1905         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906                          "bad data alignment.\n");
1907       return false;
1908     }
1909
1910   if (slp)
1911     {
1912       /* Analyze operations in the SLP instances.  Note this may
1913          remove unsupported SLP instances which makes the above
1914          SLP kind detection invalid.  */
1915       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
1916       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1917                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1918       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
1919         return false;
1920     }
1921
1922   /* Scan all the remaining operations in the loop that are not subject
1923      to SLP and make sure they are vectorizable.  */
1924   ok = vect_analyze_loop_operations (loop_vinfo);
1925   if (!ok)
1926     {
1927       if (dump_enabled_p ())
1928         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1929                          "bad operation or unsupported loop bound.\n");
1930       return false;
1931     }
1932
1933   /* Decide whether we need to create an epilogue loop to handle
1934      remaining scalar iterations.  */
1935   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
1936         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1937        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1938
1939   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1940       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1941     {
1942       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
1943                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1944           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1945         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1946     }
1947   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1948            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1949                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1950                /* In case of versioning, check if the maximum number of
1951                   iterations is greater than th.  If they are identical,
1952                   the epilogue is unnecessary.  */
1953                && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1954                     && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1955                    || (unsigned HOST_WIDE_INT)max_stmt_executions_int
1956                         (LOOP_VINFO_LOOP (loop_vinfo)) > th)))
1957     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1958
1959   /* If an epilogue loop is required make sure we can create one.  */
1960   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1961       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
1962     {
1963       if (dump_enabled_p ())
1964         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
1965       if (!vect_can_advance_ivs_p (loop_vinfo)
1966           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
1967                                            single_exit (LOOP_VINFO_LOOP
1968                                                          (loop_vinfo))))
1969         {
1970           if (dump_enabled_p ())
1971             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972                              "not vectorized: can't create required "
1973                              "epilog loop\n");
1974           return false;
1975         }
1976     }
1977
1978   return true;
1979 }
1980
1981 /* Function vect_analyze_loop.
1982
1983    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1984    for it.  The different analyses will record information in the
1985    loop_vec_info struct.  */
1986 loop_vec_info
1987 vect_analyze_loop (struct loop *loop)
1988 {
1989   loop_vec_info loop_vinfo;
1990   unsigned int vector_sizes;
1991
1992   /* Autodetect first vector size we try.  */
1993   current_vector_size = 0;
1994   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1995
1996   if (dump_enabled_p ())
1997     dump_printf_loc (MSG_NOTE, vect_location,
1998                      "===== analyze_loop_nest =====\n");
1999
2000   if (loop_outer (loop)
2001       && loop_vec_info_for_loop (loop_outer (loop))
2002       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2003     {
2004       if (dump_enabled_p ())
2005         dump_printf_loc (MSG_NOTE, vect_location,
2006                          "outer-loop already vectorized.\n");
2007       return NULL;
2008     }
2009
2010   while (1)
2011     {
2012       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2013       loop_vinfo = vect_analyze_loop_form (loop);
2014       if (!loop_vinfo)
2015         {
2016           if (dump_enabled_p ())
2017             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2018                              "bad loop form.\n");
2019           return NULL;
2020         }
2021
2022       if (vect_analyze_loop_2 (loop_vinfo))
2023         {
2024           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2025
2026           return loop_vinfo;
2027         }
2028
2029       destroy_loop_vec_info (loop_vinfo, true);
2030
2031       vector_sizes &= ~current_vector_size;
2032       if (vector_sizes == 0
2033           || current_vector_size == 0)
2034         return NULL;
2035
2036       /* Try the next biggest vector size.  */
2037       current_vector_size = 1 << floor_log2 (vector_sizes);
2038       if (dump_enabled_p ())
2039         dump_printf_loc (MSG_NOTE, vect_location,
2040                          "***** Re-trying analysis with "
2041                          "vector size %d\n", current_vector_size);
2042     }
2043 }
2044
2045
2046 /* Function reduction_code_for_scalar_code
2047
2048    Input:
2049    CODE - tree_code of a reduction operations.
2050
2051    Output:
2052    REDUC_CODE - the corresponding tree-code to be used to reduce the
2053       vector of partial results into a single scalar result, or ERROR_MARK
2054       if the operation is a supported reduction operation, but does not have
2055       such a tree-code.
2056
2057    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2058
2059 static bool
2060 reduction_code_for_scalar_code (enum tree_code code,
2061                                 enum tree_code *reduc_code)
2062 {
2063   switch (code)
2064     {
2065       case MAX_EXPR:
2066         *reduc_code = REDUC_MAX_EXPR;
2067         return true;
2068
2069       case MIN_EXPR:
2070         *reduc_code = REDUC_MIN_EXPR;
2071         return true;
2072
2073       case PLUS_EXPR:
2074         *reduc_code = REDUC_PLUS_EXPR;
2075         return true;
2076
2077       case MULT_EXPR:
2078       case MINUS_EXPR:
2079       case BIT_IOR_EXPR:
2080       case BIT_XOR_EXPR:
2081       case BIT_AND_EXPR:
2082         *reduc_code = ERROR_MARK;
2083         return true;
2084
2085       default:
2086        return false;
2087     }
2088 }
2089
2090
2091 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2092    STMT is printed with a message MSG. */
2093
2094 static void
2095 report_vect_op (int msg_type, gimple stmt, const char *msg)
2096 {
2097   dump_printf_loc (msg_type, vect_location, "%s", msg);
2098   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2099   dump_printf (msg_type, "\n");
2100 }
2101
2102
2103 /* Detect SLP reduction of the form:
2104
2105    #a1 = phi <a5, a0>
2106    a2 = operation (a1)
2107    a3 = operation (a2)
2108    a4 = operation (a3)
2109    a5 = operation (a4)
2110
2111    #a = phi <a5>
2112
2113    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2114    FIRST_STMT is the first reduction stmt in the chain
2115    (a2 = operation (a1)).
2116
2117    Return TRUE if a reduction chain was detected.  */
2118
2119 static bool
2120 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
2121 {
2122   struct loop *loop = (gimple_bb (phi))->loop_father;
2123   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2124   enum tree_code code;
2125   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
2126   stmt_vec_info use_stmt_info, current_stmt_info;
2127   tree lhs;
2128   imm_use_iterator imm_iter;
2129   use_operand_p use_p;
2130   int nloop_uses, size = 0, n_out_of_loop_uses;
2131   bool found = false;
2132
2133   if (loop != vect_loop)
2134     return false;
2135
2136   lhs = PHI_RESULT (phi);
2137   code = gimple_assign_rhs_code (first_stmt);
2138   while (1)
2139     {
2140       nloop_uses = 0;
2141       n_out_of_loop_uses = 0;
2142       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2143         {
2144           gimple use_stmt = USE_STMT (use_p);
2145           if (is_gimple_debug (use_stmt))
2146             continue;
2147
2148           /* Check if we got back to the reduction phi.  */
2149           if (use_stmt == phi)
2150             {
2151               loop_use_stmt = use_stmt;
2152               found = true;
2153               break;
2154             }
2155
2156           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2157             {
2158               loop_use_stmt = use_stmt;
2159               nloop_uses++;
2160             }
2161            else
2162              n_out_of_loop_uses++;
2163
2164            /* There are can be either a single use in the loop or two uses in
2165               phi nodes.  */
2166            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2167              return false;
2168         }
2169
2170       if (found)
2171         break;
2172
2173       /* We reached a statement with no loop uses.  */
2174       if (nloop_uses == 0)
2175         return false;
2176
2177       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2178       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2179         return false;
2180
2181       if (!is_gimple_assign (loop_use_stmt)
2182           || code != gimple_assign_rhs_code (loop_use_stmt)
2183           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2184         return false;
2185
2186       /* Insert USE_STMT into reduction chain.  */
2187       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2188       if (current_stmt)
2189         {
2190           current_stmt_info = vinfo_for_stmt (current_stmt);
2191           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2192           GROUP_FIRST_ELEMENT (use_stmt_info)
2193             = GROUP_FIRST_ELEMENT (current_stmt_info);
2194         }
2195       else
2196         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2197
2198       lhs = gimple_assign_lhs (loop_use_stmt);
2199       current_stmt = loop_use_stmt;
2200       size++;
2201    }
2202
2203   if (!found || loop_use_stmt != phi || size < 2)
2204     return false;
2205
2206   /* Swap the operands, if needed, to make the reduction operand be the second
2207      operand.  */
2208   lhs = PHI_RESULT (phi);
2209   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2210   while (next_stmt)
2211     {
2212       if (gimple_assign_rhs2 (next_stmt) == lhs)
2213         {
2214           tree op = gimple_assign_rhs1 (next_stmt);
2215           gimple def_stmt = NULL;
2216
2217           if (TREE_CODE (op) == SSA_NAME)
2218             def_stmt = SSA_NAME_DEF_STMT (op);
2219
2220           /* Check that the other def is either defined in the loop
2221              ("vect_internal_def"), or it's an induction (defined by a
2222              loop-header phi-node).  */
2223           if (def_stmt
2224               && gimple_bb (def_stmt)
2225               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2226               && (is_gimple_assign (def_stmt)
2227                   || is_gimple_call (def_stmt)
2228                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2229                            == vect_induction_def
2230                   || (gimple_code (def_stmt) == GIMPLE_PHI
2231                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2232                                   == vect_internal_def
2233                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2234             {
2235               lhs = gimple_assign_lhs (next_stmt);
2236               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2237               continue;
2238             }
2239
2240           return false;
2241         }
2242       else
2243         {
2244           tree op = gimple_assign_rhs2 (next_stmt);
2245           gimple def_stmt = NULL;
2246
2247           if (TREE_CODE (op) == SSA_NAME)
2248             def_stmt = SSA_NAME_DEF_STMT (op);
2249
2250           /* Check that the other def is either defined in the loop
2251             ("vect_internal_def"), or it's an induction (defined by a
2252             loop-header phi-node).  */
2253           if (def_stmt
2254               && gimple_bb (def_stmt)
2255               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2256               && (is_gimple_assign (def_stmt)
2257                   || is_gimple_call (def_stmt)
2258                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2259                               == vect_induction_def
2260                   || (gimple_code (def_stmt) == GIMPLE_PHI
2261                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2262                                   == vect_internal_def
2263                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2264             {
2265               if (dump_enabled_p ())
2266                 {
2267                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2268                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2269                   dump_printf (MSG_NOTE, "\n");
2270                 }
2271
2272               swap_ssa_operands (next_stmt,
2273                                  gimple_assign_rhs1_ptr (next_stmt),
2274                                  gimple_assign_rhs2_ptr (next_stmt));
2275               update_stmt (next_stmt);
2276
2277               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2278                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2279             }
2280           else
2281             return false;
2282         }
2283
2284       lhs = gimple_assign_lhs (next_stmt);
2285       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2286     }
2287
2288   /* Save the chain for further analysis in SLP detection.  */
2289   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2290   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2291   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2292
2293   return true;
2294 }
2295
2296
2297 /* Function vect_is_simple_reduction_1
2298
2299    (1) Detect a cross-iteration def-use cycle that represents a simple
2300    reduction computation.  We look for the following pattern:
2301
2302    loop_header:
2303      a1 = phi < a0, a2 >
2304      a3 = ...
2305      a2 = operation (a3, a1)
2306
2307    or
2308
2309    a3 = ...
2310    loop_header:
2311      a1 = phi < a0, a2 >
2312      a2 = operation (a3, a1)
2313
2314    such that:
2315    1. operation is commutative and associative and it is safe to
2316       change the order of the computation (if CHECK_REDUCTION is true)
2317    2. no uses for a2 in the loop (a2 is used out of the loop)
2318    3. no uses of a1 in the loop besides the reduction operation
2319    4. no uses of a1 outside the loop.
2320
2321    Conditions 1,4 are tested here.
2322    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2323
2324    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2325    nested cycles, if CHECK_REDUCTION is false.
2326
2327    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2328    reductions:
2329
2330      a1 = phi < a0, a2 >
2331      inner loop (def of a3)
2332      a2 = phi < a3 >
2333
2334    If MODIFY is true it tries also to rework the code in-place to enable
2335    detection of more reduction patterns.  For the time being we rewrite
2336    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2337 */
2338
2339 static gimple
2340 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2341                             bool check_reduction, bool *double_reduc,
2342                             bool modify, bool need_wrapping_integral_overflow)
2343 {
2344   struct loop *loop = (gimple_bb (phi))->loop_father;
2345   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2346   edge latch_e = loop_latch_edge (loop);
2347   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2348   gimple def_stmt, def1 = NULL, def2 = NULL;
2349   enum tree_code orig_code, code;
2350   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2351   tree type;
2352   int nloop_uses;
2353   tree name;
2354   imm_use_iterator imm_iter;
2355   use_operand_p use_p;
2356   bool phi_def;
2357
2358   *double_reduc = false;
2359
2360   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2361      otherwise, we assume outer loop vectorization.  */
2362   gcc_assert ((check_reduction && loop == vect_loop)
2363               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2364
2365   name = PHI_RESULT (phi);
2366   /* ???  If there are no uses of the PHI result the inner loop reduction
2367      won't be detected as possibly double-reduction by vectorizable_reduction
2368      because that tries to walk the PHI arg from the preheader edge which
2369      can be constant.  See PR60382.  */
2370   if (has_zero_uses (name))
2371     return NULL;
2372   nloop_uses = 0;
2373   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2374     {
2375       gimple use_stmt = USE_STMT (use_p);
2376       if (is_gimple_debug (use_stmt))
2377         continue;
2378
2379       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2380         {
2381           if (dump_enabled_p ())
2382             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2383                              "intermediate value used outside loop.\n");
2384
2385           return NULL;
2386         }
2387
2388       nloop_uses++;
2389       if (nloop_uses > 1)
2390         {
2391           if (dump_enabled_p ())
2392             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2393                              "reduction used in loop.\n");
2394           return NULL;
2395         }
2396     }
2397
2398   if (TREE_CODE (loop_arg) != SSA_NAME)
2399     {
2400       if (dump_enabled_p ())
2401         {
2402           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2403                            "reduction: not ssa_name: ");
2404           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2405           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2406         }
2407       return NULL;
2408     }
2409
2410   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2411   if (!def_stmt)
2412     {
2413       if (dump_enabled_p ())
2414         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415                          "reduction: no def_stmt.\n");
2416       return NULL;
2417     }
2418
2419   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2420     {
2421       if (dump_enabled_p ())
2422         {
2423           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2424           dump_printf (MSG_NOTE, "\n");
2425         }
2426       return NULL;
2427     }
2428
2429   if (is_gimple_assign (def_stmt))
2430     {
2431       name = gimple_assign_lhs (def_stmt);
2432       phi_def = false;
2433     }
2434   else
2435     {
2436       name = PHI_RESULT (def_stmt);
2437       phi_def = true;
2438     }
2439
2440   nloop_uses = 0;
2441   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2442     {
2443       gimple use_stmt = USE_STMT (use_p);
2444       if (is_gimple_debug (use_stmt))
2445         continue;
2446       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2447         nloop_uses++;
2448       if (nloop_uses > 1)
2449         {
2450           if (dump_enabled_p ())
2451             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2452                              "reduction used in loop.\n");
2453           return NULL;
2454         }
2455     }
2456
2457   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2458      defined in the inner loop.  */
2459   if (phi_def)
2460     {
2461       op1 = PHI_ARG_DEF (def_stmt, 0);
2462
2463       if (gimple_phi_num_args (def_stmt) != 1
2464           || TREE_CODE (op1) != SSA_NAME)
2465         {
2466           if (dump_enabled_p ())
2467             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468                              "unsupported phi node definition.\n");
2469
2470           return NULL;
2471         }
2472
2473       def1 = SSA_NAME_DEF_STMT (op1);
2474       if (gimple_bb (def1)
2475           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2476           && loop->inner
2477           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2478           && is_gimple_assign (def1))
2479         {
2480           if (dump_enabled_p ())
2481             report_vect_op (MSG_NOTE, def_stmt,
2482                             "detected double reduction: ");
2483
2484           *double_reduc = true;
2485           return def_stmt;
2486         }
2487
2488       return NULL;
2489     }
2490
2491   code = orig_code = gimple_assign_rhs_code (def_stmt);
2492
2493   /* We can handle "res -= x[i]", which is non-associative by
2494      simply rewriting this into "res += -x[i]".  Avoid changing
2495      gimple instruction for the first simple tests and only do this
2496      if we're allowed to change code at all.  */
2497   if (code == MINUS_EXPR
2498       && modify
2499       && (op1 = gimple_assign_rhs1 (def_stmt))
2500       && TREE_CODE (op1) == SSA_NAME
2501       && SSA_NAME_DEF_STMT (op1) == phi)
2502     code = PLUS_EXPR;
2503
2504   if (check_reduction
2505       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2506     {
2507       if (dump_enabled_p ())
2508         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2509                         "reduction: not commutative/associative: ");
2510       return NULL;
2511     }
2512
2513   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2514     {
2515       if (code != COND_EXPR)
2516         {
2517           if (dump_enabled_p ())
2518             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2519                             "reduction: not binary operation: ");
2520
2521           return NULL;
2522         }
2523
2524       op3 = gimple_assign_rhs1 (def_stmt);
2525       if (COMPARISON_CLASS_P (op3))
2526         {
2527           op4 = TREE_OPERAND (op3, 1);
2528           op3 = TREE_OPERAND (op3, 0);
2529         }
2530
2531       op1 = gimple_assign_rhs2 (def_stmt);
2532       op2 = gimple_assign_rhs3 (def_stmt);
2533
2534       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2535         {
2536           if (dump_enabled_p ())
2537             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2538                             "reduction: uses not ssa_names: ");
2539
2540           return NULL;
2541         }
2542     }
2543   else
2544     {
2545       op1 = gimple_assign_rhs1 (def_stmt);
2546       op2 = gimple_assign_rhs2 (def_stmt);
2547
2548       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2549         {
2550           if (dump_enabled_p ())
2551             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2552                             "reduction: uses not ssa_names: ");
2553
2554           return NULL;
2555         }
2556    }
2557
2558   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2559   if ((TREE_CODE (op1) == SSA_NAME
2560        && !types_compatible_p (type,TREE_TYPE (op1)))
2561       || (TREE_CODE (op2) == SSA_NAME
2562           && !types_compatible_p (type, TREE_TYPE (op2)))
2563       || (op3 && TREE_CODE (op3) == SSA_NAME
2564           && !types_compatible_p (type, TREE_TYPE (op3)))
2565       || (op4 && TREE_CODE (op4) == SSA_NAME
2566           && !types_compatible_p (type, TREE_TYPE (op4))))
2567     {
2568       if (dump_enabled_p ())
2569         {
2570           dump_printf_loc (MSG_NOTE, vect_location,
2571                            "reduction: multiple types: operation type: ");
2572           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2573           dump_printf (MSG_NOTE, ", operands types: ");
2574           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2575                              TREE_TYPE (op1));
2576           dump_printf (MSG_NOTE, ",");
2577           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2578                              TREE_TYPE (op2));
2579           if (op3)
2580             {
2581               dump_printf (MSG_NOTE, ",");
2582               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2583                                  TREE_TYPE (op3));
2584             }
2585
2586           if (op4)
2587             {
2588               dump_printf (MSG_NOTE, ",");
2589               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2590                                  TREE_TYPE (op4));
2591             }
2592           dump_printf (MSG_NOTE, "\n");
2593         }
2594
2595       return NULL;
2596     }
2597
2598   /* Check that it's ok to change the order of the computation.
2599      Generally, when vectorizing a reduction we change the order of the
2600      computation.  This may change the behavior of the program in some
2601      cases, so we need to check that this is ok.  One exception is when
2602      vectorizing an outer-loop: the inner-loop is executed sequentially,
2603      and therefore vectorizing reductions in the inner-loop during
2604      outer-loop vectorization is safe.  */
2605
2606   /* CHECKME: check for !flag_finite_math_only too?  */
2607   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2608       && check_reduction)
2609     {
2610       /* Changing the order of operations changes the semantics.  */
2611       if (dump_enabled_p ())
2612         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2613                         "reduction: unsafe fp math optimization: ");
2614       return NULL;
2615     }
2616   else if (INTEGRAL_TYPE_P (type) && check_reduction)
2617     {
2618       if (TYPE_OVERFLOW_TRAPS (type))
2619         {
2620           /* Changing the order of operations changes the semantics.  */
2621           if (dump_enabled_p ())
2622             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2623                             "reduction: unsafe int math optimization"
2624                             " (overflow traps): ");
2625           return NULL;
2626         }
2627       if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
2628         {
2629           /* Changing the order of operations changes the semantics.  */
2630           if (dump_enabled_p ())
2631             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2632                             "reduction: unsafe int math optimization"
2633                             " (overflow doesn't wrap): ");
2634           return NULL;
2635         }
2636     }
2637   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2638     {
2639       /* Changing the order of operations changes the semantics.  */
2640       if (dump_enabled_p ())
2641         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2642                         "reduction: unsafe fixed-point math optimization: ");
2643       return NULL;
2644     }
2645
2646   /* If we detected "res -= x[i]" earlier, rewrite it into
2647      "res += -x[i]" now.  If this turns out to be useless reassoc
2648      will clean it up again.  */
2649   if (orig_code == MINUS_EXPR)
2650     {
2651       tree rhs = gimple_assign_rhs2 (def_stmt);
2652       tree negrhs = make_ssa_name (TREE_TYPE (rhs));
2653       gimple negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
2654       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2655       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2656                                                           loop_info, NULL));
2657       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2658       gimple_assign_set_rhs2 (def_stmt, negrhs);
2659       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2660       update_stmt (def_stmt);
2661     }
2662
2663   /* Reduction is safe. We're dealing with one of the following:
2664      1) integer arithmetic and no trapv
2665      2) floating point arithmetic, and special flags permit this optimization
2666      3) nested cycle (i.e., outer loop vectorization).  */
2667   if (TREE_CODE (op1) == SSA_NAME)
2668     def1 = SSA_NAME_DEF_STMT (op1);
2669
2670   if (TREE_CODE (op2) == SSA_NAME)
2671     def2 = SSA_NAME_DEF_STMT (op2);
2672
2673   if (code != COND_EXPR
2674       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2675     {
2676       if (dump_enabled_p ())
2677         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2678       return NULL;
2679     }
2680
2681   /* Check that one def is the reduction def, defined by PHI,
2682      the other def is either defined in the loop ("vect_internal_def"),
2683      or it's an induction (defined by a loop-header phi-node).  */
2684
2685   if (def2 && def2 == phi
2686       && (code == COND_EXPR
2687           || !def1 || gimple_nop_p (def1)
2688           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2689           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2690               && (is_gimple_assign (def1)
2691                   || is_gimple_call (def1)
2692                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2693                       == vect_induction_def
2694                   || (gimple_code (def1) == GIMPLE_PHI
2695                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2696                           == vect_internal_def
2697                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2698     {
2699       if (dump_enabled_p ())
2700         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2701       return def_stmt;
2702     }
2703
2704   if (def1 && def1 == phi
2705       && (code == COND_EXPR
2706           || !def2 || gimple_nop_p (def2)
2707           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2708           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2709               && (is_gimple_assign (def2)
2710                   || is_gimple_call (def2)
2711                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2712                       == vect_induction_def
2713                   || (gimple_code (def2) == GIMPLE_PHI
2714                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2715                           == vect_internal_def
2716                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2717     {
2718       if (check_reduction)
2719         {
2720           /* Swap operands (just for simplicity - so that the rest of the code
2721              can assume that the reduction variable is always the last (second)
2722              argument).  */
2723           if (dump_enabled_p ())
2724             report_vect_op (MSG_NOTE, def_stmt,
2725                             "detected reduction: need to swap operands: ");
2726
2727           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2728                              gimple_assign_rhs2_ptr (def_stmt));
2729
2730           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2731             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2732         }
2733       else
2734         {
2735           if (dump_enabled_p ())
2736             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2737         }
2738
2739       return def_stmt;
2740     }
2741
2742   /* Try to find SLP reduction chain.  */
2743   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2744     {
2745       if (dump_enabled_p ())
2746         report_vect_op (MSG_NOTE, def_stmt,
2747                         "reduction: detected reduction chain: ");
2748
2749       return def_stmt;
2750     }
2751
2752   if (dump_enabled_p ())
2753     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2754                     "reduction: unknown pattern: ");
2755
2756   return NULL;
2757 }
2758
2759 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2760    in-place.  Arguments as there.  */
2761
2762 static gimple
2763 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2764                           bool check_reduction, bool *double_reduc,
2765                           bool need_wrapping_integral_overflow)
2766 {
2767   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2768                                      double_reduc, false,
2769                                      need_wrapping_integral_overflow);
2770 }
2771
2772 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2773    in-place if it enables detection of more reductions.  Arguments
2774    as there.  */
2775
2776 gimple
2777 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2778                              bool check_reduction, bool *double_reduc,
2779                              bool need_wrapping_integral_overflow)
2780 {
2781   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2782                                      double_reduc, true,
2783                                      need_wrapping_integral_overflow);
2784 }
2785
2786 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2787 int
2788 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2789                              int *peel_iters_epilogue,
2790                              stmt_vector_for_cost *scalar_cost_vec,
2791                              stmt_vector_for_cost *prologue_cost_vec,
2792                              stmt_vector_for_cost *epilogue_cost_vec)
2793 {
2794   int retval = 0;
2795   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2796
2797   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2798     {
2799       *peel_iters_epilogue = vf/2;
2800       if (dump_enabled_p ())
2801         dump_printf_loc (MSG_NOTE, vect_location,
2802                          "cost model: epilogue peel iters set to vf/2 "
2803                          "because loop iterations are unknown .\n");
2804
2805       /* If peeled iterations are known but number of scalar loop
2806          iterations are unknown, count a taken branch per peeled loop.  */
2807       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2808                                  NULL, 0, vect_prologue);
2809       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2810                                  NULL, 0, vect_epilogue);
2811     }
2812   else
2813     {
2814       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2815       peel_iters_prologue = niters < peel_iters_prologue ?
2816                             niters : peel_iters_prologue;
2817       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2818       /* If we need to peel for gaps, but no peeling is required, we have to
2819          peel VF iterations.  */
2820       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2821         *peel_iters_epilogue = vf;
2822     }
2823
2824   stmt_info_for_cost *si;
2825   int j;
2826   if (peel_iters_prologue)
2827     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2828       retval += record_stmt_cost (prologue_cost_vec,
2829                                   si->count * peel_iters_prologue,
2830                                   si->kind, NULL, si->misalign,
2831                                   vect_prologue);
2832   if (*peel_iters_epilogue)
2833     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2834       retval += record_stmt_cost (epilogue_cost_vec,
2835                                   si->count * *peel_iters_epilogue,
2836                                   si->kind, NULL, si->misalign,
2837                                   vect_epilogue);
2838
2839   return retval;
2840 }
2841
2842 /* Function vect_estimate_min_profitable_iters
2843
2844    Return the number of iterations required for the vector version of the
2845    loop to be profitable relative to the cost of the scalar version of the
2846    loop.  */
2847
2848 static void
2849 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2850                                     int *ret_min_profitable_niters,
2851                                     int *ret_min_profitable_estimate)
2852 {
2853   int min_profitable_iters;
2854   int min_profitable_estimate;
2855   int peel_iters_prologue;
2856   int peel_iters_epilogue;
2857   unsigned vec_inside_cost = 0;
2858   int vec_outside_cost = 0;
2859   unsigned vec_prologue_cost = 0;
2860   unsigned vec_epilogue_cost = 0;
2861   int scalar_single_iter_cost = 0;
2862   int scalar_outside_cost = 0;
2863   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2864   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2865   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2866
2867   /* Cost model disabled.  */
2868   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2869     {
2870       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2871       *ret_min_profitable_niters = 0;
2872       *ret_min_profitable_estimate = 0;
2873       return;
2874     }
2875
2876   /* Requires loop versioning tests to handle misalignment.  */
2877   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2878     {
2879       /*  FIXME: Make cost depend on complexity of individual check.  */
2880       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2881       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2882                             vect_prologue);
2883       dump_printf (MSG_NOTE,
2884                    "cost model: Adding cost of checks for loop "
2885                    "versioning to treat misalignment.\n");
2886     }
2887
2888   /* Requires loop versioning with alias checks.  */
2889   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2890     {
2891       /*  FIXME: Make cost depend on complexity of individual check.  */
2892       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2893       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2894                             vect_prologue);
2895       dump_printf (MSG_NOTE,
2896                    "cost model: Adding cost of checks for loop "
2897                    "versioning aliasing.\n");
2898     }
2899
2900   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2901       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2902     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2903                           vect_prologue);
2904
2905   /* Count statements in scalar loop.  Using this as scalar cost for a single
2906      iteration for now.
2907
2908      TODO: Add outer loop support.
2909
2910      TODO: Consider assigning different costs to different scalar
2911      statements.  */
2912
2913   scalar_single_iter_cost
2914     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
2915
2916   /* Add additional cost for the peeled instructions in prologue and epilogue
2917      loop.
2918
2919      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2920      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2921
2922      TODO: Build an expression that represents peel_iters for prologue and
2923      epilogue to be used in a run-time test.  */
2924
2925   if (npeel  < 0)
2926     {
2927       peel_iters_prologue = vf/2;
2928       dump_printf (MSG_NOTE, "cost model: "
2929                    "prologue peel iters set to vf/2.\n");
2930
2931       /* If peeling for alignment is unknown, loop bound of main loop becomes
2932          unknown.  */
2933       peel_iters_epilogue = vf/2;
2934       dump_printf (MSG_NOTE, "cost model: "
2935                    "epilogue peel iters set to vf/2 because "
2936                    "peeling for alignment is unknown.\n");
2937
2938       /* If peeled iterations are unknown, count a taken branch and a not taken
2939          branch per peeled loop. Even if scalar loop iterations are known,
2940          vector iterations are not known since peeled prologue iterations are
2941          not known. Hence guards remain the same.  */
2942       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
2943                             NULL, 0, vect_prologue);
2944       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
2945                             NULL, 0, vect_prologue);
2946       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
2947                             NULL, 0, vect_epilogue);
2948       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
2949                             NULL, 0, vect_epilogue);
2950       stmt_info_for_cost *si;
2951       int j;
2952       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
2953         {
2954           struct _stmt_vec_info *stmt_info
2955             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2956           (void) add_stmt_cost (target_cost_data,
2957                                 si->count * peel_iters_prologue,
2958                                 si->kind, stmt_info, si->misalign,
2959                                 vect_prologue);
2960           (void) add_stmt_cost (target_cost_data,
2961                                 si->count * peel_iters_epilogue,
2962                                 si->kind, stmt_info, si->misalign,
2963                                 vect_epilogue);
2964         }
2965     }
2966   else
2967     {
2968       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2969       stmt_info_for_cost *si;
2970       int j;
2971       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2972
2973       prologue_cost_vec.create (2);
2974       epilogue_cost_vec.create (2);
2975       peel_iters_prologue = npeel;
2976
2977       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2978                                           &peel_iters_epilogue,
2979                                           &LOOP_VINFO_SCALAR_ITERATION_COST
2980                                             (loop_vinfo),
2981                                           &prologue_cost_vec,
2982                                           &epilogue_cost_vec);
2983
2984       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2985         {
2986           struct _stmt_vec_info *stmt_info
2987             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2988           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2989                                 si->misalign, vect_prologue);
2990         }
2991
2992       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2993         {
2994           struct _stmt_vec_info *stmt_info
2995             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2996           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2997                                 si->misalign, vect_epilogue);
2998         }
2999
3000       prologue_cost_vec.release ();
3001       epilogue_cost_vec.release ();
3002     }
3003
3004   /* FORNOW: The scalar outside cost is incremented in one of the
3005      following ways:
3006
3007      1. The vectorizer checks for alignment and aliasing and generates
3008      a condition that allows dynamic vectorization.  A cost model
3009      check is ANDED with the versioning condition.  Hence scalar code
3010      path now has the added cost of the versioning check.
3011
3012        if (cost > th & versioning_check)
3013          jmp to vector code
3014
3015      Hence run-time scalar is incremented by not-taken branch cost.
3016
3017      2. The vectorizer then checks if a prologue is required.  If the
3018      cost model check was not done before during versioning, it has to
3019      be done before the prologue check.
3020
3021        if (cost <= th)
3022          prologue = scalar_iters
3023        if (prologue == 0)
3024          jmp to vector code
3025        else
3026          execute prologue
3027        if (prologue == num_iters)
3028          go to exit
3029
3030      Hence the run-time scalar cost is incremented by a taken branch,
3031      plus a not-taken branch, plus a taken branch cost.
3032
3033      3. The vectorizer then checks if an epilogue is required.  If the
3034      cost model check was not done before during prologue check, it
3035      has to be done with the epilogue check.
3036
3037        if (prologue == 0)
3038          jmp to vector code
3039        else
3040          execute prologue
3041        if (prologue == num_iters)
3042          go to exit
3043        vector code:
3044          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3045            jmp to epilogue
3046
3047      Hence the run-time scalar cost should be incremented by 2 taken
3048      branches.
3049
3050      TODO: The back end may reorder the BBS's differently and reverse
3051      conditions/branch directions.  Change the estimates below to
3052      something more reasonable.  */
3053
3054   /* If the number of iterations is known and we do not do versioning, we can
3055      decide whether to vectorize at compile time.  Hence the scalar version
3056      do not carry cost model guard costs.  */
3057   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3058       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3059       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3060     {
3061       /* Cost model check occurs at versioning.  */
3062       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3063           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3064         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3065       else
3066         {
3067           /* Cost model check occurs at prologue generation.  */
3068           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3069             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3070               + vect_get_stmt_cost (cond_branch_not_taken);
3071           /* Cost model check occurs at epilogue generation.  */
3072           else
3073             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3074         }
3075     }
3076
3077   /* Complete the target-specific cost calculations.  */
3078   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3079                &vec_inside_cost, &vec_epilogue_cost);
3080
3081   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3082
3083   if (dump_enabled_p ())
3084     {
3085       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3086       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3087                    vec_inside_cost);
3088       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3089                    vec_prologue_cost);
3090       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3091                    vec_epilogue_cost);
3092       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3093                    scalar_single_iter_cost);
3094       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3095                    scalar_outside_cost);
3096       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3097                    vec_outside_cost);
3098       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3099                    peel_iters_prologue);
3100       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3101                    peel_iters_epilogue);
3102     }
3103
3104   /* Calculate number of iterations required to make the vector version
3105      profitable, relative to the loop bodies only.  The following condition
3106      must hold true:
3107      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3108      where
3109      SIC = scalar iteration cost, VIC = vector iteration cost,
3110      VOC = vector outside cost, VF = vectorization factor,
3111      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3112      SOC = scalar outside cost for run time cost model check.  */
3113
3114   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3115     {
3116       if (vec_outside_cost <= 0)
3117         min_profitable_iters = 1;
3118       else
3119         {
3120           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3121                                   - vec_inside_cost * peel_iters_prologue
3122                                   - vec_inside_cost * peel_iters_epilogue)
3123                                  / ((scalar_single_iter_cost * vf)
3124                                     - vec_inside_cost);
3125
3126           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3127               <= (((int) vec_inside_cost * min_profitable_iters)
3128                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3129             min_profitable_iters++;
3130         }
3131     }
3132   /* vector version will never be profitable.  */
3133   else
3134     {
3135       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3136         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3137                     "did not happen for a simd loop");
3138
3139       if (dump_enabled_p ())
3140         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3141                          "cost model: the vector iteration cost = %d "
3142                          "divided by the scalar iteration cost = %d "
3143                          "is greater or equal to the vectorization factor = %d"
3144                          ".\n",
3145                          vec_inside_cost, scalar_single_iter_cost, vf);
3146       *ret_min_profitable_niters = -1;
3147       *ret_min_profitable_estimate = -1;
3148       return;
3149     }
3150
3151   dump_printf (MSG_NOTE,
3152                "  Calculated minimum iters for profitability: %d\n",
3153                min_profitable_iters);
3154
3155   min_profitable_iters =
3156         min_profitable_iters < vf ? vf : min_profitable_iters;
3157
3158   /* Because the condition we create is:
3159      if (niters <= min_profitable_iters)
3160        then skip the vectorized loop.  */
3161   min_profitable_iters--;
3162
3163   if (dump_enabled_p ())
3164     dump_printf_loc (MSG_NOTE, vect_location,
3165                      "  Runtime profitability threshold = %d\n",
3166                      min_profitable_iters);
3167
3168   *ret_min_profitable_niters = min_profitable_iters;
3169
3170   /* Calculate number of iterations required to make the vector version
3171      profitable, relative to the loop bodies only.
3172
3173      Non-vectorized variant is SIC * niters and it must win over vector
3174      variant on the expected loop trip count.  The following condition must hold true:
3175      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3176
3177   if (vec_outside_cost <= 0)
3178     min_profitable_estimate = 1;
3179   else
3180     {
3181       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3182                                  - vec_inside_cost * peel_iters_prologue
3183                                  - vec_inside_cost * peel_iters_epilogue)
3184                                  / ((scalar_single_iter_cost * vf)
3185                                    - vec_inside_cost);
3186     }
3187   min_profitable_estimate --;
3188   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3189   if (dump_enabled_p ())
3190     dump_printf_loc (MSG_NOTE, vect_location,
3191                      "  Static estimate profitability threshold = %d\n",
3192                       min_profitable_iters);
3193
3194   *ret_min_profitable_estimate = min_profitable_estimate;
3195 }
3196
3197 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3198    vector elements (not bits) for a vector of mode MODE.  */
3199 static void
3200 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3201                               unsigned char *sel)
3202 {
3203   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3204
3205   for (i = 0; i < nelt; i++)
3206     sel[i] = (i + offset) & (2*nelt - 1);
3207 }
3208
3209 /* Checks whether the target supports whole-vector shifts for vectors of mode
3210    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3211    it supports vec_perm_const with masks for all necessary shift amounts.  */
3212 static bool
3213 have_whole_vector_shift (enum machine_mode mode)
3214 {
3215   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3216     return true;
3217
3218   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3219     return false;
3220
3221   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3222   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3223
3224   for (i = nelt/2; i >= 1; i/=2)
3225     {
3226       calc_vec_perm_mask_for_shift (mode, i, sel);
3227       if (!can_vec_perm_p (mode, false, sel))
3228         return false;
3229     }
3230   return true;
3231 }
3232
3233 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3234
3235 static tree
3236 get_reduction_op (gimple stmt, int reduc_index)
3237 {
3238   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3239     {
3240     case GIMPLE_SINGLE_RHS:
3241       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3242                   == ternary_op);
3243       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3244     case GIMPLE_UNARY_RHS:
3245       return gimple_assign_rhs1 (stmt);
3246     case GIMPLE_BINARY_RHS:
3247       return (reduc_index
3248               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3249     case GIMPLE_TERNARY_RHS:
3250       return gimple_op (stmt, reduc_index + 1);
3251     default:
3252       gcc_unreachable ();
3253     }
3254 }
3255
3256 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3257    functions. Design better to avoid maintenance issues.  */
3258
3259 /* Function vect_model_reduction_cost.
3260
3261    Models cost for a reduction operation, including the vector ops
3262    generated within the strip-mine loop, the initial definition before
3263    the loop, and the epilogue code that must be generated.  */
3264
3265 static bool
3266 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3267                            int ncopies, int reduc_index)
3268 {
3269   int prologue_cost = 0, epilogue_cost = 0;
3270   enum tree_code code;
3271   optab optab;
3272   tree vectype;
3273   gimple stmt, orig_stmt;
3274   tree reduction_op;
3275   machine_mode mode;
3276   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3277   struct loop *loop = NULL;
3278   void *target_cost_data;
3279
3280   if (loop_vinfo)
3281     {
3282       loop = LOOP_VINFO_LOOP (loop_vinfo);
3283       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3284     }
3285   else
3286     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3287
3288   /* Cost of reduction op inside loop.  */
3289   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3290                                         stmt_info, 0, vect_body);
3291   stmt = STMT_VINFO_STMT (stmt_info);
3292
3293   reduction_op = get_reduction_op (stmt, reduc_index);
3294
3295   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3296   if (!vectype)
3297     {
3298       if (dump_enabled_p ())
3299         {
3300           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3301                            "unsupported data-type ");
3302           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3303                              TREE_TYPE (reduction_op));
3304           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3305         }
3306       return false;
3307    }
3308
3309   mode = TYPE_MODE (vectype);
3310   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3311
3312   if (!orig_stmt)
3313     orig_stmt = STMT_VINFO_STMT (stmt_info);
3314
3315   code = gimple_assign_rhs_code (orig_stmt);
3316
3317   /* Add in cost for initial definition.  */
3318   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3319                                   stmt_info, 0, vect_prologue);
3320
3321   /* Determine cost of epilogue code.
3322
3323      We have a reduction operator that will reduce the vector in one statement.
3324      Also requires scalar extract.  */
3325
3326   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3327     {
3328       if (reduc_code != ERROR_MARK)
3329         {
3330           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3331                                           stmt_info, 0, vect_epilogue);
3332           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3333                                           stmt_info, 0, vect_epilogue);
3334         }
3335       else
3336         {
3337           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3338           tree bitsize =
3339             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3340           int element_bitsize = tree_to_uhwi (bitsize);
3341           int nelements = vec_size_in_bits / element_bitsize;
3342
3343           optab = optab_for_tree_code (code, vectype, optab_default);
3344
3345           /* We have a whole vector shift available.  */
3346           if (VECTOR_MODE_P (mode)
3347               && optab_handler (optab, mode) != CODE_FOR_nothing
3348               && have_whole_vector_shift (mode))
3349             {
3350               /* Final reduction via vector shifts and the reduction operator.
3351                  Also requires scalar extract.  */
3352               epilogue_cost += add_stmt_cost (target_cost_data,
3353                                               exact_log2 (nelements) * 2,
3354                                               vector_stmt, stmt_info, 0,
3355                                               vect_epilogue);
3356               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3357                                               vec_to_scalar, stmt_info, 0,
3358                                               vect_epilogue);
3359             }
3360           else
3361             /* Use extracts and reduction op for final reduction.  For N
3362                elements, we have N extracts and N-1 reduction ops.  */
3363             epilogue_cost += add_stmt_cost (target_cost_data,
3364                                             nelements + nelements - 1,
3365                                             vector_stmt, stmt_info, 0,
3366                                             vect_epilogue);
3367         }
3368     }
3369
3370   if (dump_enabled_p ())
3371     dump_printf (MSG_NOTE,
3372                  "vect_model_reduction_cost: inside_cost = %d, "
3373                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3374                  prologue_cost, epilogue_cost);
3375
3376   return true;
3377 }
3378
3379
3380 /* Function vect_model_induction_cost.
3381
3382    Models cost for induction operations.  */
3383
3384 static void
3385 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3386 {
3387   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3388   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3389   unsigned inside_cost, prologue_cost;
3390
3391   /* loop cost for vec_loop.  */
3392   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3393                                stmt_info, 0, vect_body);
3394
3395   /* prologue cost for vec_init and vec_step.  */
3396   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3397                                  stmt_info, 0, vect_prologue);
3398
3399   if (dump_enabled_p ())
3400     dump_printf_loc (MSG_NOTE, vect_location,
3401                      "vect_model_induction_cost: inside_cost = %d, "
3402                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3403 }
3404
3405
3406 /* Function get_initial_def_for_induction
3407
3408    Input:
3409    STMT - a stmt that performs an induction operation in the loop.
3410    IV_PHI - the initial value of the induction variable
3411
3412    Output:
3413    Return a vector variable, initialized with the first VF values of
3414    the induction variable.  E.g., for an iv with IV_PHI='X' and
3415    evolution S, for a vector of 4 units, we want to return:
3416    [X, X + S, X + 2*S, X + 3*S].  */
3417
3418 static tree
3419 get_initial_def_for_induction (gimple iv_phi)
3420 {
3421   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3422   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3423   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3424   tree vectype;
3425   int nunits;
3426   edge pe = loop_preheader_edge (loop);
3427   struct loop *iv_loop;
3428   basic_block new_bb;
3429   tree new_vec, vec_init, vec_step, t;
3430   tree new_var;
3431   tree new_name;
3432   gimple init_stmt, new_stmt;
3433   gphi *induction_phi;
3434   tree induc_def, vec_def, vec_dest;
3435   tree init_expr, step_expr;
3436   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3437   int i;
3438   int ncopies;
3439   tree expr;
3440   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3441   bool nested_in_vect_loop = false;
3442   gimple_seq stmts = NULL;
3443   imm_use_iterator imm_iter;
3444   use_operand_p use_p;
3445   gimple exit_phi;
3446   edge latch_e;
3447   tree loop_arg;
3448   gimple_stmt_iterator si;
3449   basic_block bb = gimple_bb (iv_phi);
3450   tree stepvectype;
3451   tree resvectype;
3452
3453   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3454   if (nested_in_vect_loop_p (loop, iv_phi))
3455     {
3456       nested_in_vect_loop = true;
3457       iv_loop = loop->inner;
3458     }
3459   else
3460     iv_loop = loop;
3461   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3462
3463   latch_e = loop_latch_edge (iv_loop);
3464   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3465
3466   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3467   gcc_assert (step_expr != NULL_TREE);
3468
3469   pe = loop_preheader_edge (iv_loop);
3470   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3471                                      loop_preheader_edge (iv_loop));
3472
3473   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3474   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3475   gcc_assert (vectype);
3476   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3477   ncopies = vf / nunits;
3478
3479   gcc_assert (phi_info);
3480   gcc_assert (ncopies >= 1);
3481
3482   /* Convert the step to the desired type.  */
3483   step_expr = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3484                                                   step_expr),
3485                                     &stmts, true, NULL_TREE);
3486   if (stmts)
3487     {
3488       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3489       gcc_assert (!new_bb);
3490     }
3491
3492   /* Find the first insertion point in the BB.  */
3493   si = gsi_after_labels (bb);
3494
3495   /* Create the vector that holds the initial_value of the induction.  */
3496   if (nested_in_vect_loop)
3497     {
3498       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3499          been created during vectorization of previous stmts.  We obtain it
3500          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3501       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi, NULL);
3502       /* If the initial value is not of proper type, convert it.  */
3503       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3504         {
3505           new_stmt
3506             = gimple_build_assign (vect_get_new_vect_var (vectype,
3507                                                           vect_simple_var,
3508                                                           "vec_iv_"),
3509                                    VIEW_CONVERT_EXPR,
3510                                    build1 (VIEW_CONVERT_EXPR, vectype,
3511                                            vec_init));
3512           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3513           gimple_assign_set_lhs (new_stmt, vec_init);
3514           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3515                                                  new_stmt);
3516           gcc_assert (!new_bb);
3517           set_vinfo_for_stmt (new_stmt,
3518                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3519         }
3520     }
3521   else
3522     {
3523       vec<constructor_elt, va_gc> *v;
3524
3525       /* iv_loop is the loop to be vectorized. Create:
3526          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3527       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3528                                        vect_scalar_var, "var_");
3529       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3530                                                      init_expr),
3531                                        &stmts, false, new_var);
3532       if (stmts)
3533         {
3534           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3535           gcc_assert (!new_bb);
3536         }
3537
3538       vec_alloc (v, nunits);
3539       bool constant_p = is_gimple_min_invariant (new_name);
3540       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3541       for (i = 1; i < nunits; i++)
3542         {
3543           /* Create: new_name_i = new_name + step_expr  */
3544           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3545                                   new_name, step_expr);
3546           if (!is_gimple_min_invariant (new_name))
3547             {
3548               init_stmt = gimple_build_assign (new_var, new_name);
3549               new_name = make_ssa_name (new_var, init_stmt);
3550               gimple_assign_set_lhs (init_stmt, new_name);
3551               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3552               gcc_assert (!new_bb);
3553               if (dump_enabled_p ())
3554                 {
3555                   dump_printf_loc (MSG_NOTE, vect_location,
3556                                    "created new init_stmt: ");
3557                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3558                   dump_printf (MSG_NOTE, "\n");
3559                 }
3560               constant_p = false;
3561             }
3562           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3563         }
3564       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3565       if (constant_p)
3566         new_vec = build_vector_from_ctor (vectype, v);
3567       else
3568         new_vec = build_constructor (vectype, v);
3569       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3570     }
3571
3572
3573   /* Create the vector that holds the step of the induction.  */
3574   if (nested_in_vect_loop)
3575     /* iv_loop is nested in the loop to be vectorized. Generate:
3576        vec_step = [S, S, S, S]  */
3577     new_name = step_expr;
3578   else
3579     {
3580       /* iv_loop is the loop to be vectorized. Generate:
3581           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3582       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3583         {
3584           expr = build_int_cst (integer_type_node, vf);
3585           expr = fold_convert (TREE_TYPE (step_expr), expr);
3586         }
3587       else
3588         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3589       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3590                               expr, step_expr);
3591       if (TREE_CODE (step_expr) == SSA_NAME)
3592         new_name = vect_init_vector (iv_phi, new_name,
3593                                      TREE_TYPE (step_expr), NULL);
3594     }
3595
3596   t = unshare_expr (new_name);
3597   gcc_assert (CONSTANT_CLASS_P (new_name)
3598               || TREE_CODE (new_name) == SSA_NAME);
3599   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3600   gcc_assert (stepvectype);
3601   new_vec = build_vector_from_val (stepvectype, t);
3602   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3603
3604
3605   /* Create the following def-use cycle:
3606      loop prolog:
3607          vec_init = ...
3608          vec_step = ...
3609      loop:
3610          vec_iv = PHI <vec_init, vec_loop>
3611          ...
3612          STMT
3613          ...
3614          vec_loop = vec_iv + vec_step;  */
3615
3616   /* Create the induction-phi that defines the induction-operand.  */
3617   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3618   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3619   set_vinfo_for_stmt (induction_phi,
3620                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3621   induc_def = PHI_RESULT (induction_phi);
3622
3623   /* Create the iv update inside the loop  */
3624   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3625   vec_def = make_ssa_name (vec_dest, new_stmt);
3626   gimple_assign_set_lhs (new_stmt, vec_def);
3627   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3628   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3629                                                    NULL));
3630
3631   /* Set the arguments of the phi node:  */
3632   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3633   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3634                UNKNOWN_LOCATION);
3635
3636
3637   /* In case that vectorization factor (VF) is bigger than the number
3638      of elements that we can fit in a vectype (nunits), we have to generate
3639      more than one vector stmt - i.e - we need to "unroll" the
3640      vector stmt by a factor VF/nunits.  For more details see documentation
3641      in vectorizable_operation.  */
3642
3643   if (ncopies > 1)
3644     {
3645       stmt_vec_info prev_stmt_vinfo;
3646       /* FORNOW. This restriction should be relaxed.  */
3647       gcc_assert (!nested_in_vect_loop);
3648
3649       /* Create the vector that holds the step of the induction.  */
3650       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3651         {
3652           expr = build_int_cst (integer_type_node, nunits);
3653           expr = fold_convert (TREE_TYPE (step_expr), expr);
3654         }
3655       else
3656         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3657       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3658                               expr, step_expr);
3659       if (TREE_CODE (step_expr) == SSA_NAME)
3660         new_name = vect_init_vector (iv_phi, new_name,
3661                                      TREE_TYPE (step_expr), NULL);
3662       t = unshare_expr (new_name);
3663       gcc_assert (CONSTANT_CLASS_P (new_name)
3664                   || TREE_CODE (new_name) == SSA_NAME);
3665       new_vec = build_vector_from_val (stepvectype, t);
3666       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3667
3668       vec_def = induc_def;
3669       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3670       for (i = 1; i < ncopies; i++)
3671         {
3672           /* vec_i = vec_prev + vec_step  */
3673           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3674                                           vec_def, vec_step);
3675           vec_def = make_ssa_name (vec_dest, new_stmt);
3676           gimple_assign_set_lhs (new_stmt, vec_def);
3677
3678           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3679           if (!useless_type_conversion_p (resvectype, vectype))
3680             {
3681               new_stmt
3682                 = gimple_build_assign
3683                         (vect_get_new_vect_var (resvectype, vect_simple_var,
3684                                                 "vec_iv_"),
3685                          VIEW_CONVERT_EXPR,
3686                          build1 (VIEW_CONVERT_EXPR, resvectype,
3687                                  gimple_assign_lhs (new_stmt)));
3688               gimple_assign_set_lhs (new_stmt,
3689                                      make_ssa_name
3690                                        (gimple_assign_lhs (new_stmt), new_stmt));
3691               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3692             }
3693           set_vinfo_for_stmt (new_stmt,
3694                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3695           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3696           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3697         }
3698     }
3699
3700   if (nested_in_vect_loop)
3701     {
3702       /* Find the loop-closed exit-phi of the induction, and record
3703          the final vector of induction results:  */
3704       exit_phi = NULL;
3705       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3706         {
3707           gimple use_stmt = USE_STMT (use_p);
3708           if (is_gimple_debug (use_stmt))
3709             continue;
3710
3711           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3712             {
3713               exit_phi = use_stmt;
3714               break;
3715             }
3716         }
3717       if (exit_phi)
3718         {
3719           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3720           /* FORNOW. Currently not supporting the case that an inner-loop induction
3721              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3722           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3723                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3724
3725           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3726           if (dump_enabled_p ())
3727             {
3728               dump_printf_loc (MSG_NOTE, vect_location,
3729                                "vector of inductions after inner-loop:");
3730               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3731               dump_printf (MSG_NOTE, "\n");
3732             }
3733         }
3734     }
3735
3736
3737   if (dump_enabled_p ())
3738     {
3739       dump_printf_loc (MSG_NOTE, vect_location,
3740                        "transform induction: created def-use cycle: ");
3741       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3742       dump_printf (MSG_NOTE, "\n");
3743       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3744                         SSA_NAME_DEF_STMT (vec_def), 0);
3745       dump_printf (MSG_NOTE, "\n");
3746     }
3747
3748   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3749   if (!useless_type_conversion_p (resvectype, vectype))
3750     {
3751       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3752                                                              vect_simple_var,
3753                                                              "vec_iv_"),
3754                                       VIEW_CONVERT_EXPR,
3755                                       build1 (VIEW_CONVERT_EXPR, resvectype,
3756                                               induc_def));
3757       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3758       gimple_assign_set_lhs (new_stmt, induc_def);
3759       si = gsi_after_labels (bb);
3760       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3761       set_vinfo_for_stmt (new_stmt,
3762                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3763       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3764         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3765     }
3766
3767   return induc_def;
3768 }
3769
3770
3771 /* Function get_initial_def_for_reduction
3772
3773    Input:
3774    STMT - a stmt that performs a reduction operation in the loop.
3775    INIT_VAL - the initial value of the reduction variable
3776
3777    Output:
3778    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3779         of the reduction (used for adjusting the epilog - see below).
3780    Return a vector variable, initialized according to the operation that STMT
3781         performs. This vector will be used as the initial value of the
3782         vector of partial results.
3783
3784    Option1 (adjust in epilog): Initialize the vector as follows:
3785      add/bit or/xor:    [0,0,...,0,0]
3786      mult/bit and:      [1,1,...,1,1]
3787      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3788    and when necessary (e.g. add/mult case) let the caller know
3789    that it needs to adjust the result by init_val.
3790
3791    Option2: Initialize the vector as follows:
3792      add/bit or/xor:    [init_val,0,0,...,0]
3793      mult/bit and:      [init_val,1,1,...,1]
3794      min/max/cond_expr: [init_val,init_val,...,init_val]
3795    and no adjustments are needed.
3796
3797    For example, for the following code:
3798
3799    s = init_val;
3800    for (i=0;i<n;i++)
3801      s = s + a[i];
3802
3803    STMT is 's = s + a[i]', and the reduction variable is 's'.
3804    For a vector of 4 units, we want to return either [0,0,0,init_val],
3805    or [0,0,0,0] and let the caller know that it needs to adjust
3806    the result at the end by 'init_val'.
3807
3808    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3809    initialization vector is simpler (same element in all entries), if
3810    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3811
3812    A cost model should help decide between these two schemes.  */
3813
3814 tree
3815 get_initial_def_for_reduction (gimple stmt, tree init_val,
3816                                tree *adjustment_def)
3817 {
3818   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3819   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3820   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3821   tree scalar_type = TREE_TYPE (init_val);
3822   tree vectype = get_vectype_for_scalar_type (scalar_type);
3823   int nunits;
3824   enum tree_code code = gimple_assign_rhs_code (stmt);
3825   tree def_for_init;
3826   tree init_def;
3827   tree *elts;
3828   int i;
3829   bool nested_in_vect_loop = false;
3830   tree init_value;
3831   REAL_VALUE_TYPE real_init_val = dconst0;
3832   int int_init_val = 0;
3833   gimple def_stmt = NULL;
3834
3835   gcc_assert (vectype);
3836   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3837
3838   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3839               || SCALAR_FLOAT_TYPE_P (scalar_type));
3840
3841   if (nested_in_vect_loop_p (loop, stmt))
3842     nested_in_vect_loop = true;
3843   else
3844     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3845
3846   /* In case of double reduction we only create a vector variable to be put
3847      in the reduction phi node.  The actual statement creation is done in
3848      vect_create_epilog_for_reduction.  */
3849   if (adjustment_def && nested_in_vect_loop
3850       && TREE_CODE (init_val) == SSA_NAME
3851       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3852       && gimple_code (def_stmt) == GIMPLE_PHI
3853       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3854       && vinfo_for_stmt (def_stmt)
3855       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3856           == vect_double_reduction_def)
3857     {
3858       *adjustment_def = NULL;
3859       return vect_create_destination_var (init_val, vectype);
3860     }
3861
3862   if (TREE_CONSTANT (init_val))
3863     {
3864       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3865         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3866       else
3867         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3868     }
3869   else
3870     init_value = init_val;
3871
3872   switch (code)
3873     {
3874       case WIDEN_SUM_EXPR:
3875       case DOT_PROD_EXPR:
3876       case SAD_EXPR:
3877       case PLUS_EXPR:
3878       case MINUS_EXPR:
3879       case BIT_IOR_EXPR:
3880       case BIT_XOR_EXPR:
3881       case MULT_EXPR:
3882       case BIT_AND_EXPR:
3883         /* ADJUSMENT_DEF is NULL when called from
3884            vect_create_epilog_for_reduction to vectorize double reduction.  */
3885         if (adjustment_def)
3886           {
3887             if (nested_in_vect_loop)
3888               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3889                                                               NULL);
3890             else
3891               *adjustment_def = init_val;
3892           }
3893
3894         if (code == MULT_EXPR)
3895           {
3896             real_init_val = dconst1;
3897             int_init_val = 1;
3898           }
3899
3900         if (code == BIT_AND_EXPR)
3901           int_init_val = -1;
3902
3903         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3904           def_for_init = build_real (scalar_type, real_init_val);
3905         else
3906           def_for_init = build_int_cst (scalar_type, int_init_val);
3907
3908         /* Create a vector of '0' or '1' except the first element.  */
3909         elts = XALLOCAVEC (tree, nunits);
3910         for (i = nunits - 2; i >= 0; --i)
3911           elts[i + 1] = def_for_init;
3912
3913         /* Option1: the first element is '0' or '1' as well.  */
3914         if (adjustment_def)
3915           {
3916             elts[0] = def_for_init;
3917             init_def = build_vector (vectype, elts);
3918             break;
3919           }
3920
3921         /* Option2: the first element is INIT_VAL.  */
3922         elts[0] = init_val;
3923         if (TREE_CONSTANT (init_val))
3924           init_def = build_vector (vectype, elts);
3925         else
3926           {
3927             vec<constructor_elt, va_gc> *v;
3928             vec_alloc (v, nunits);
3929             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3930             for (i = 1; i < nunits; ++i)
3931               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3932             init_def = build_constructor (vectype, v);
3933           }
3934
3935         break;
3936
3937       case MIN_EXPR:
3938       case MAX_EXPR:
3939       case COND_EXPR:
3940         if (adjustment_def)
3941           {
3942             *adjustment_def = NULL_TREE;
3943             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3944             break;
3945           }
3946
3947         init_def = build_vector_from_val (vectype, init_value);
3948         break;
3949
3950       default:
3951         gcc_unreachable ();
3952     }
3953
3954   return init_def;
3955 }
3956
3957 /* Function vect_create_epilog_for_reduction
3958
3959    Create code at the loop-epilog to finalize the result of a reduction
3960    computation.
3961
3962    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3963      reduction statements.
3964    STMT is the scalar reduction stmt that is being vectorized.
3965    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3966      number of elements that we can fit in a vectype (nunits).  In this case
3967      we have to generate more than one vector stmt - i.e - we need to "unroll"
3968      the vector stmt by a factor VF/nunits.  For more details see documentation
3969      in vectorizable_operation.
3970    REDUC_CODE is the tree-code for the epilog reduction.
3971    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3972      computation.
3973    REDUC_INDEX is the index of the operand in the right hand side of the
3974      statement that is defined by REDUCTION_PHI.
3975    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3976    SLP_NODE is an SLP node containing a group of reduction statements. The
3977      first one in this group is STMT.
3978
3979    This function:
3980    1. Creates the reduction def-use cycles: sets the arguments for
3981       REDUCTION_PHIS:
3982       The loop-entry argument is the vectorized initial-value of the reduction.
3983       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3984       sums.
3985    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3986       by applying the operation specified by REDUC_CODE if available, or by
3987       other means (whole-vector shifts or a scalar loop).
3988       The function also creates a new phi node at the loop exit to preserve
3989       loop-closed form, as illustrated below.
3990
3991      The flow at the entry to this function:
3992
3993         loop:
3994           vec_def = phi <null, null>            # REDUCTION_PHI
3995           VECT_DEF = vector_stmt                # vectorized form of STMT
3996           s_loop = scalar_stmt                  # (scalar) STMT
3997         loop_exit:
3998           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3999           use <s_out0>
4000           use <s_out0>
4001
4002      The above is transformed by this function into:
4003
4004         loop:
4005           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4006           VECT_DEF = vector_stmt                # vectorized form of STMT
4007           s_loop = scalar_stmt                  # (scalar) STMT
4008         loop_exit:
4009           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4010           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4011           v_out2 = reduce <v_out1>
4012           s_out3 = extract_field <v_out2, 0>
4013           s_out4 = adjust_result <s_out3>
4014           use <s_out4>
4015           use <s_out4>
4016 */
4017
4018 static void
4019 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
4020                                   int ncopies, enum tree_code reduc_code,
4021                                   vec<gimple> reduction_phis,
4022                                   int reduc_index, bool double_reduc,
4023                                   slp_tree slp_node)
4024 {
4025   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4026   stmt_vec_info prev_phi_info;
4027   tree vectype;
4028   machine_mode mode;
4029   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4030   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4031   basic_block exit_bb;
4032   tree scalar_dest;
4033   tree scalar_type;
4034   gimple new_phi = NULL, phi;
4035   gimple_stmt_iterator exit_gsi;
4036   tree vec_dest;
4037   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4038   gimple epilog_stmt = NULL;
4039   enum tree_code code = gimple_assign_rhs_code (stmt);
4040   gimple exit_phi;
4041   tree bitsize;
4042   tree adjustment_def = NULL;
4043   tree vec_initial_def = NULL;
4044   tree reduction_op, expr, def;
4045   tree orig_name, scalar_result;
4046   imm_use_iterator imm_iter, phi_imm_iter;
4047   use_operand_p use_p, phi_use_p;
4048   gimple use_stmt, orig_stmt, reduction_phi = NULL;
4049   bool nested_in_vect_loop = false;
4050   auto_vec<gimple> new_phis;
4051   auto_vec<gimple> inner_phis;
4052   enum vect_def_type dt = vect_unknown_def_type;
4053   int j, i;
4054   auto_vec<tree> scalar_results;
4055   unsigned int group_size = 1, k, ratio;
4056   auto_vec<tree> vec_initial_defs;
4057   auto_vec<gimple> phis;
4058   bool slp_reduc = false;
4059   tree new_phi_result;
4060   gimple inner_phi = NULL;
4061
4062   if (slp_node)
4063     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4064
4065   if (nested_in_vect_loop_p (loop, stmt))
4066     {
4067       outer_loop = loop;
4068       loop = loop->inner;
4069       nested_in_vect_loop = true;
4070       gcc_assert (!slp_node);
4071     }
4072
4073   reduction_op = get_reduction_op (stmt, reduc_index);
4074
4075   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4076   gcc_assert (vectype);
4077   mode = TYPE_MODE (vectype);
4078
4079   /* 1. Create the reduction def-use cycle:
4080      Set the arguments of REDUCTION_PHIS, i.e., transform
4081
4082         loop:
4083           vec_def = phi <null, null>            # REDUCTION_PHI
4084           VECT_DEF = vector_stmt                # vectorized form of STMT
4085           ...
4086
4087      into:
4088
4089         loop:
4090           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4091           VECT_DEF = vector_stmt                # vectorized form of STMT
4092           ...
4093
4094      (in case of SLP, do it for all the phis). */
4095
4096   /* Get the loop-entry arguments.  */
4097   if (slp_node)
4098     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4099                        NULL, slp_node, reduc_index);
4100   else
4101     {
4102       vec_initial_defs.create (1);
4103      /* For the case of reduction, vect_get_vec_def_for_operand returns
4104         the scalar def before the loop, that defines the initial value
4105         of the reduction variable.  */
4106       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
4107                                                       &adjustment_def);
4108       vec_initial_defs.quick_push (vec_initial_def);
4109     }
4110
4111   /* Set phi nodes arguments.  */
4112   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4113     {
4114       tree vec_init_def, def;
4115       gimple_seq stmts;
4116       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4117                                            true, NULL_TREE);
4118       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4119       def = vect_defs[i];
4120       for (j = 0; j < ncopies; j++)
4121         {
4122           /* Set the loop-entry arg of the reduction-phi.  */
4123           add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4124                        loop_preheader_edge (loop), UNKNOWN_LOCATION);
4125
4126           /* Set the loop-latch arg for the reduction-phi.  */
4127           if (j > 0)
4128             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4129
4130           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4131                        UNKNOWN_LOCATION);
4132
4133           if (dump_enabled_p ())
4134             {
4135               dump_printf_loc (MSG_NOTE, vect_location,
4136                                "transform reduction: created def-use cycle: ");
4137               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4138               dump_printf (MSG_NOTE, "\n");
4139               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4140               dump_printf (MSG_NOTE, "\n");
4141             }
4142
4143           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4144         }
4145     }
4146
4147   /* 2. Create epilog code.
4148         The reduction epilog code operates across the elements of the vector
4149         of partial results computed by the vectorized loop.
4150         The reduction epilog code consists of:
4151
4152         step 1: compute the scalar result in a vector (v_out2)
4153         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4154         step 3: adjust the scalar result (s_out3) if needed.
4155
4156         Step 1 can be accomplished using one the following three schemes:
4157           (scheme 1) using reduc_code, if available.
4158           (scheme 2) using whole-vector shifts, if available.
4159           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4160                      combined.
4161
4162           The overall epilog code looks like this:
4163
4164           s_out0 = phi <s_loop>         # original EXIT_PHI
4165           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4166           v_out2 = reduce <v_out1>              # step 1
4167           s_out3 = extract_field <v_out2, 0>    # step 2
4168           s_out4 = adjust_result <s_out3>       # step 3
4169
4170           (step 3 is optional, and steps 1 and 2 may be combined).
4171           Lastly, the uses of s_out0 are replaced by s_out4.  */
4172
4173
4174   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4175          v_out1 = phi <VECT_DEF>
4176          Store them in NEW_PHIS.  */
4177
4178   exit_bb = single_exit (loop)->dest;
4179   prev_phi_info = NULL;
4180   new_phis.create (vect_defs.length ());
4181   FOR_EACH_VEC_ELT (vect_defs, i, def)
4182     {
4183       for (j = 0; j < ncopies; j++)
4184         {
4185           tree new_def = copy_ssa_name (def);
4186           phi = create_phi_node (new_def, exit_bb);
4187           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
4188           if (j == 0)
4189             new_phis.quick_push (phi);
4190           else
4191             {
4192               def = vect_get_vec_def_for_stmt_copy (dt, def);
4193               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4194             }
4195
4196           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4197           prev_phi_info = vinfo_for_stmt (phi);
4198         }
4199     }
4200
4201   /* The epilogue is created for the outer-loop, i.e., for the loop being
4202      vectorized.  Create exit phis for the outer loop.  */
4203   if (double_reduc)
4204     {
4205       loop = outer_loop;
4206       exit_bb = single_exit (loop)->dest;
4207       inner_phis.create (vect_defs.length ());
4208       FOR_EACH_VEC_ELT (new_phis, i, phi)
4209         {
4210           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4211           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4212           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4213                            PHI_RESULT (phi));
4214           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4215                                                             loop_vinfo, NULL));
4216           inner_phis.quick_push (phi);
4217           new_phis[i] = outer_phi;
4218           prev_phi_info = vinfo_for_stmt (outer_phi);
4219           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4220             {
4221               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4222               new_result = copy_ssa_name (PHI_RESULT (phi));
4223               outer_phi = create_phi_node (new_result, exit_bb);
4224               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4225                                PHI_RESULT (phi));
4226               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4227                                                         loop_vinfo, NULL));
4228               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4229               prev_phi_info = vinfo_for_stmt (outer_phi);
4230             }
4231         }
4232     }
4233
4234   exit_gsi = gsi_after_labels (exit_bb);
4235
4236   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4237          (i.e. when reduc_code is not available) and in the final adjustment
4238          code (if needed).  Also get the original scalar reduction variable as
4239          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4240          represents a reduction pattern), the tree-code and scalar-def are
4241          taken from the original stmt that the pattern-stmt (STMT) replaces.
4242          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4243          are taken from STMT.  */
4244
4245   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4246   if (!orig_stmt)
4247     {
4248       /* Regular reduction  */
4249       orig_stmt = stmt;
4250     }
4251   else
4252     {
4253       /* Reduction pattern  */
4254       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4255       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4256       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4257     }
4258
4259   code = gimple_assign_rhs_code (orig_stmt);
4260   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4261      partial results are added and not subtracted.  */
4262   if (code == MINUS_EXPR)
4263     code = PLUS_EXPR;
4264
4265   scalar_dest = gimple_assign_lhs (orig_stmt);
4266   scalar_type = TREE_TYPE (scalar_dest);
4267   scalar_results.create (group_size);
4268   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4269   bitsize = TYPE_SIZE (scalar_type);
4270
4271   /* In case this is a reduction in an inner-loop while vectorizing an outer
4272      loop - we don't need to extract a single scalar result at the end of the
4273      inner-loop (unless it is double reduction, i.e., the use of reduction is
4274      outside the outer-loop).  The final vector of partial results will be used
4275      in the vectorized outer-loop, or reduced to a scalar result at the end of
4276      the outer-loop.  */
4277   if (nested_in_vect_loop && !double_reduc)
4278     goto vect_finalize_reduction;
4279
4280   /* SLP reduction without reduction chain, e.g.,
4281      # a1 = phi <a2, a0>
4282      # b1 = phi <b2, b0>
4283      a2 = operation (a1)
4284      b2 = operation (b1)  */
4285   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4286
4287   /* In case of reduction chain, e.g.,
4288      # a1 = phi <a3, a0>
4289      a2 = operation (a1)
4290      a3 = operation (a2),
4291
4292      we may end up with more than one vector result.  Here we reduce them to
4293      one vector.  */
4294   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4295     {
4296       tree first_vect = PHI_RESULT (new_phis[0]);
4297       tree tmp;
4298       gassign *new_vec_stmt = NULL;
4299
4300       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4301       for (k = 1; k < new_phis.length (); k++)
4302         {
4303           gimple next_phi = new_phis[k];
4304           tree second_vect = PHI_RESULT (next_phi);
4305
4306           tmp = build2 (code, vectype,  first_vect, second_vect);
4307           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4308           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4309           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4310           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4311         }
4312
4313       new_phi_result = first_vect;
4314       if (new_vec_stmt)
4315         {
4316           new_phis.truncate (0);
4317           new_phis.safe_push (new_vec_stmt);
4318         }
4319     }
4320   else
4321     new_phi_result = PHI_RESULT (new_phis[0]);
4322
4323   /* 2.3 Create the reduction code, using one of the three schemes described
4324          above. In SLP we simply need to extract all the elements from the
4325          vector (without reducing them), so we use scalar shifts.  */
4326   if (reduc_code != ERROR_MARK && !slp_reduc)
4327     {
4328       tree tmp;
4329       tree vec_elem_type;
4330
4331       /*** Case 1:  Create:
4332            v_out2 = reduc_expr <v_out1>  */
4333
4334       if (dump_enabled_p ())
4335         dump_printf_loc (MSG_NOTE, vect_location,
4336                          "Reduce using direct vector reduction.\n");
4337
4338       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4339       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4340         {
4341           tree tmp_dest =
4342               vect_create_destination_var (scalar_dest, vec_elem_type);
4343           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4344           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4345           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4346           gimple_assign_set_lhs (epilog_stmt, new_temp);
4347           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4348
4349           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4350         }
4351       else
4352         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4353       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4354       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4355       gimple_assign_set_lhs (epilog_stmt, new_temp);
4356       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4357       scalar_results.safe_push (new_temp);
4358     }
4359   else
4360     {
4361       bool reduce_with_shift = have_whole_vector_shift (mode);
4362       int element_bitsize = tree_to_uhwi (bitsize);
4363       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4364       tree vec_temp;
4365
4366       /* Regardless of whether we have a whole vector shift, if we're
4367          emulating the operation via tree-vect-generic, we don't want
4368          to use it.  Only the first round of the reduction is likely
4369          to still be profitable via emulation.  */
4370       /* ??? It might be better to emit a reduction tree code here, so that
4371          tree-vect-generic can expand the first round via bit tricks.  */
4372       if (!VECTOR_MODE_P (mode))
4373         reduce_with_shift = false;
4374       else
4375         {
4376           optab optab = optab_for_tree_code (code, vectype, optab_default);
4377           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4378             reduce_with_shift = false;
4379         }
4380
4381       if (reduce_with_shift && !slp_reduc)
4382         {
4383           int nelements = vec_size_in_bits / element_bitsize;
4384           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4385
4386           int elt_offset;
4387
4388           tree zero_vec = build_zero_cst (vectype);
4389           /*** Case 2: Create:
4390              for (offset = nelements/2; offset >= 1; offset/=2)
4391                 {
4392                   Create:  va' = vec_shift <va, offset>
4393                   Create:  va = vop <va, va'>
4394                 }  */
4395
4396           tree rhs;
4397
4398           if (dump_enabled_p ())
4399             dump_printf_loc (MSG_NOTE, vect_location,
4400                              "Reduce using vector shifts\n");
4401
4402           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4403           new_temp = new_phi_result;
4404           for (elt_offset = nelements / 2;
4405                elt_offset >= 1;
4406                elt_offset /= 2)
4407             {
4408               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4409               tree mask = vect_gen_perm_mask_any (vectype, sel);
4410               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4411                                                  new_temp, zero_vec, mask);
4412               new_name = make_ssa_name (vec_dest, epilog_stmt);
4413               gimple_assign_set_lhs (epilog_stmt, new_name);
4414               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4415
4416               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4417                                                  new_temp);
4418               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4419               gimple_assign_set_lhs (epilog_stmt, new_temp);
4420               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4421             }
4422
4423           /* 2.4  Extract the final scalar result.  Create:
4424              s_out3 = extract_field <v_out2, bitpos>  */
4425
4426           if (dump_enabled_p ())
4427             dump_printf_loc (MSG_NOTE, vect_location,
4428                              "extract scalar result\n");
4429
4430           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4431                         bitsize, bitsize_zero_node);
4432           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4433           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4434           gimple_assign_set_lhs (epilog_stmt, new_temp);
4435           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4436           scalar_results.safe_push (new_temp);
4437         }
4438       else
4439         {
4440           /*** Case 3: Create:
4441              s = extract_field <v_out2, 0>
4442              for (offset = element_size;
4443                   offset < vector_size;
4444                   offset += element_size;)
4445                {
4446                  Create:  s' = extract_field <v_out2, offset>
4447                  Create:  s = op <s, s'>  // For non SLP cases
4448                }  */
4449
4450           if (dump_enabled_p ())
4451             dump_printf_loc (MSG_NOTE, vect_location,
4452                              "Reduce using scalar code.\n");
4453
4454           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4455           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4456             {
4457               int bit_offset;
4458               if (gimple_code (new_phi) == GIMPLE_PHI)
4459                 vec_temp = PHI_RESULT (new_phi);
4460               else
4461                 vec_temp = gimple_assign_lhs (new_phi);
4462               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4463                             bitsize_zero_node);
4464               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4465               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4466               gimple_assign_set_lhs (epilog_stmt, new_temp);
4467               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4468
4469               /* In SLP we don't need to apply reduction operation, so we just
4470                  collect s' values in SCALAR_RESULTS.  */
4471               if (slp_reduc)
4472                 scalar_results.safe_push (new_temp);
4473
4474               for (bit_offset = element_bitsize;
4475                    bit_offset < vec_size_in_bits;
4476                    bit_offset += element_bitsize)
4477                 {
4478                   tree bitpos = bitsize_int (bit_offset);
4479                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4480                                      bitsize, bitpos);
4481
4482                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4483                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4484                   gimple_assign_set_lhs (epilog_stmt, new_name);
4485                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4486
4487                   if (slp_reduc)
4488                     {
4489                       /* In SLP we don't need to apply reduction operation, so
4490                          we just collect s' values in SCALAR_RESULTS.  */
4491                       new_temp = new_name;
4492                       scalar_results.safe_push (new_name);
4493                     }
4494                   else
4495                     {
4496                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4497                                                          new_name, new_temp);
4498                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4499                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4500                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4501                     }
4502                 }
4503             }
4504
4505           /* The only case where we need to reduce scalar results in SLP, is
4506              unrolling.  If the size of SCALAR_RESULTS is greater than
4507              GROUP_SIZE, we reduce them combining elements modulo
4508              GROUP_SIZE.  */
4509           if (slp_reduc)
4510             {
4511               tree res, first_res, new_res;
4512               gimple new_stmt;
4513
4514               /* Reduce multiple scalar results in case of SLP unrolling.  */
4515               for (j = group_size; scalar_results.iterate (j, &res);
4516                    j++)
4517                 {
4518                   first_res = scalar_results[j % group_size];
4519                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4520                                                   first_res, res);
4521                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4522                   gimple_assign_set_lhs (new_stmt, new_res);
4523                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4524                   scalar_results[j % group_size] = new_res;
4525                 }
4526             }
4527           else
4528             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4529             scalar_results.safe_push (new_temp);
4530         }
4531     }
4532
4533 vect_finalize_reduction:
4534
4535   if (double_reduc)
4536     loop = loop->inner;
4537
4538   /* 2.5 Adjust the final result by the initial value of the reduction
4539          variable. (When such adjustment is not needed, then
4540          'adjustment_def' is zero).  For example, if code is PLUS we create:
4541          new_temp = loop_exit_def + adjustment_def  */
4542
4543   if (adjustment_def)
4544     {
4545       gcc_assert (!slp_reduc);
4546       if (nested_in_vect_loop)
4547         {
4548           new_phi = new_phis[0];
4549           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4550           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4551           new_dest = vect_create_destination_var (scalar_dest, vectype);
4552         }
4553       else
4554         {
4555           new_temp = scalar_results[0];
4556           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4557           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4558           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4559         }
4560
4561       epilog_stmt = gimple_build_assign (new_dest, expr);
4562       new_temp = make_ssa_name (new_dest, epilog_stmt);
4563       gimple_assign_set_lhs (epilog_stmt, new_temp);
4564       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4565       if (nested_in_vect_loop)
4566         {
4567           set_vinfo_for_stmt (epilog_stmt,
4568                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4569                                                  NULL));
4570           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4571                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4572
4573           if (!double_reduc)
4574             scalar_results.quick_push (new_temp);
4575           else
4576             scalar_results[0] = new_temp;
4577         }
4578       else
4579         scalar_results[0] = new_temp;
4580
4581       new_phis[0] = epilog_stmt;
4582     }
4583
4584   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4585           phis with new adjusted scalar results, i.e., replace use <s_out0>
4586           with use <s_out4>.
4587
4588      Transform:
4589         loop_exit:
4590           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4591           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4592           v_out2 = reduce <v_out1>
4593           s_out3 = extract_field <v_out2, 0>
4594           s_out4 = adjust_result <s_out3>
4595           use <s_out0>
4596           use <s_out0>
4597
4598      into:
4599
4600         loop_exit:
4601           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4602           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4603           v_out2 = reduce <v_out1>
4604           s_out3 = extract_field <v_out2, 0>
4605           s_out4 = adjust_result <s_out3>
4606           use <s_out4>
4607           use <s_out4> */
4608
4609
4610   /* In SLP reduction chain we reduce vector results into one vector if
4611      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4612      the last stmt in the reduction chain, since we are looking for the loop
4613      exit phi node.  */
4614   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4615     {
4616       gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
4617       /* Handle reduction patterns.  */
4618       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
4619         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
4620
4621       scalar_dest = gimple_assign_lhs (dest_stmt);
4622       group_size = 1;
4623     }
4624
4625   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4626      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4627      need to match SCALAR_RESULTS with corresponding statements.  The first
4628      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4629      the first vector stmt, etc.
4630      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4631   if (group_size > new_phis.length ())
4632     {
4633       ratio = group_size / new_phis.length ();
4634       gcc_assert (!(group_size % new_phis.length ()));
4635     }
4636   else
4637     ratio = 1;
4638
4639   for (k = 0; k < group_size; k++)
4640     {
4641       if (k % ratio == 0)
4642         {
4643           epilog_stmt = new_phis[k / ratio];
4644           reduction_phi = reduction_phis[k / ratio];
4645           if (double_reduc)
4646             inner_phi = inner_phis[k / ratio];
4647         }
4648
4649       if (slp_reduc)
4650         {
4651           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4652
4653           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4654           /* SLP statements can't participate in patterns.  */
4655           gcc_assert (!orig_stmt);
4656           scalar_dest = gimple_assign_lhs (current_stmt);
4657         }
4658
4659       phis.create (3);
4660       /* Find the loop-closed-use at the loop exit of the original scalar
4661          result.  (The reduction result is expected to have two immediate uses -
4662          one at the latch block, and one at the loop exit).  */
4663       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4664         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4665             && !is_gimple_debug (USE_STMT (use_p)))
4666           phis.safe_push (USE_STMT (use_p));
4667
4668       /* While we expect to have found an exit_phi because of loop-closed-ssa
4669          form we can end up without one if the scalar cycle is dead.  */
4670
4671       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4672         {
4673           if (outer_loop)
4674             {
4675               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4676               gphi *vect_phi;
4677
4678               /* FORNOW. Currently not supporting the case that an inner-loop
4679                  reduction is not used in the outer-loop (but only outside the
4680                  outer-loop), unless it is double reduction.  */
4681               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4682                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4683                           || double_reduc);
4684
4685               if (double_reduc)
4686                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
4687               else
4688                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4689               if (!double_reduc
4690                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4691                       != vect_double_reduction_def)
4692                 continue;
4693
4694               /* Handle double reduction:
4695
4696                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4697                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4698                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4699                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4700
4701                  At that point the regular reduction (stmt2 and stmt3) is
4702                  already vectorized, as well as the exit phi node, stmt4.
4703                  Here we vectorize the phi node of double reduction, stmt1, and
4704                  update all relevant statements.  */
4705
4706               /* Go through all the uses of s2 to find double reduction phi
4707                  node, i.e., stmt1 above.  */
4708               orig_name = PHI_RESULT (exit_phi);
4709               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4710                 {
4711                   stmt_vec_info use_stmt_vinfo;
4712                   stmt_vec_info new_phi_vinfo;
4713                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4714                   basic_block bb = gimple_bb (use_stmt);
4715                   gimple use;
4716
4717                   /* Check that USE_STMT is really double reduction phi
4718                      node.  */
4719                   if (gimple_code (use_stmt) != GIMPLE_PHI
4720                       || gimple_phi_num_args (use_stmt) != 2
4721                       || bb->loop_father != outer_loop)
4722                     continue;
4723                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4724                   if (!use_stmt_vinfo
4725                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4726                           != vect_double_reduction_def)
4727                     continue;
4728
4729                   /* Create vector phi node for double reduction:
4730                      vs1 = phi <vs0, vs2>
4731                      vs1 was created previously in this function by a call to
4732                        vect_get_vec_def_for_operand and is stored in
4733                        vec_initial_def;
4734                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4735                      vs0 is created here.  */
4736
4737                   /* Create vector phi node.  */
4738                   vect_phi = create_phi_node (vec_initial_def, bb);
4739                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4740                                     loop_vec_info_for_loop (outer_loop), NULL);
4741                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4742
4743                   /* Create vs0 - initial def of the double reduction phi.  */
4744                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4745                                              loop_preheader_edge (outer_loop));
4746                   init_def = get_initial_def_for_reduction (stmt,
4747                                                           preheader_arg, NULL);
4748                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4749                                                     vectype, NULL);
4750
4751                   /* Update phi node arguments with vs0 and vs2.  */
4752                   add_phi_arg (vect_phi, vect_phi_init,
4753                                loop_preheader_edge (outer_loop),
4754                                UNKNOWN_LOCATION);
4755                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4756                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4757                   if (dump_enabled_p ())
4758                     {
4759                       dump_printf_loc (MSG_NOTE, vect_location,
4760                                        "created double reduction phi node: ");
4761                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4762                       dump_printf (MSG_NOTE, "\n");
4763                     }
4764
4765                   vect_phi_res = PHI_RESULT (vect_phi);
4766
4767                   /* Replace the use, i.e., set the correct vs1 in the regular
4768                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4769                      loop is redundant.  */
4770                   use = reduction_phi;
4771                   for (j = 0; j < ncopies; j++)
4772                     {
4773                       edge pr_edge = loop_preheader_edge (loop);
4774                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4775                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4776                     }
4777                 }
4778             }
4779         }
4780
4781       phis.release ();
4782       if (nested_in_vect_loop)
4783         {
4784           if (double_reduc)
4785             loop = outer_loop;
4786           else
4787             continue;
4788         }
4789
4790       phis.create (3);
4791       /* Find the loop-closed-use at the loop exit of the original scalar
4792          result.  (The reduction result is expected to have two immediate uses,
4793          one at the latch block, and one at the loop exit).  For double
4794          reductions we are looking for exit phis of the outer loop.  */
4795       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4796         {
4797           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4798             {
4799               if (!is_gimple_debug (USE_STMT (use_p)))
4800                 phis.safe_push (USE_STMT (use_p));
4801             }
4802           else
4803             {
4804               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4805                 {
4806                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4807
4808                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4809                     {
4810                       if (!flow_bb_inside_loop_p (loop,
4811                                              gimple_bb (USE_STMT (phi_use_p)))
4812                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4813                         phis.safe_push (USE_STMT (phi_use_p));
4814                     }
4815                 }
4816             }
4817         }
4818
4819       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4820         {
4821           /* Replace the uses:  */
4822           orig_name = PHI_RESULT (exit_phi);
4823           scalar_result = scalar_results[k];
4824           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4825             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4826               SET_USE (use_p, scalar_result);
4827         }
4828
4829       phis.release ();
4830     }
4831 }
4832
4833
4834 /* Function vectorizable_reduction.
4835
4836    Check if STMT performs a reduction operation that can be vectorized.
4837    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4838    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4839    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4840
4841    This function also handles reduction idioms (patterns) that have been
4842    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4843    of this form:
4844      X = pattern_expr (arg0, arg1, ..., X)
4845    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4846    sequence that had been detected and replaced by the pattern-stmt (STMT).
4847
4848    In some cases of reduction patterns, the type of the reduction variable X is
4849    different than the type of the other arguments of STMT.
4850    In such cases, the vectype that is used when transforming STMT into a vector
4851    stmt is different than the vectype that is used to determine the
4852    vectorization factor, because it consists of a different number of elements
4853    than the actual number of elements that are being operated upon in parallel.
4854
4855    For example, consider an accumulation of shorts into an int accumulator.
4856    On some targets it's possible to vectorize this pattern operating on 8
4857    shorts at a time (hence, the vectype for purposes of determining the
4858    vectorization factor should be V8HI); on the other hand, the vectype that
4859    is used to create the vector form is actually V4SI (the type of the result).
4860
4861    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4862    indicates what is the actual level of parallelism (V8HI in the example), so
4863    that the right vectorization factor would be derived.  This vectype
4864    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4865    be used to create the vectorized stmt.  The right vectype for the vectorized
4866    stmt is obtained from the type of the result X:
4867         get_vectype_for_scalar_type (TREE_TYPE (X))
4868
4869    This means that, contrary to "regular" reductions (or "regular" stmts in
4870    general), the following equation:
4871       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4872    does *NOT* necessarily hold for reduction patterns.  */
4873
4874 bool
4875 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4876                         gimple *vec_stmt, slp_tree slp_node)
4877 {
4878   tree vec_dest;
4879   tree scalar_dest;
4880   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4881   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4882   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4883   tree vectype_in = NULL_TREE;
4884   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4885   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4886   enum tree_code code, orig_code, epilog_reduc_code;
4887   machine_mode vec_mode;
4888   int op_type;
4889   optab optab, reduc_optab;
4890   tree new_temp = NULL_TREE;
4891   tree def;
4892   gimple def_stmt;
4893   enum vect_def_type dt;
4894   gphi *new_phi = NULL;
4895   tree scalar_type;
4896   bool is_simple_use;
4897   gimple orig_stmt;
4898   stmt_vec_info orig_stmt_info;
4899   tree expr = NULL_TREE;
4900   int i;
4901   int ncopies;
4902   int epilog_copies;
4903   stmt_vec_info prev_stmt_info, prev_phi_info;
4904   bool single_defuse_cycle = false;
4905   tree reduc_def = NULL_TREE;
4906   gimple new_stmt = NULL;
4907   int j;
4908   tree ops[3];
4909   bool nested_cycle = false, found_nested_cycle_def = false;
4910   gimple reduc_def_stmt = NULL;
4911   bool double_reduc = false, dummy;
4912   basic_block def_bb;
4913   struct loop * def_stmt_loop, *outer_loop = NULL;
4914   tree def_arg;
4915   gimple def_arg_stmt;
4916   auto_vec<tree> vec_oprnds0;
4917   auto_vec<tree> vec_oprnds1;
4918   auto_vec<tree> vect_defs;
4919   auto_vec<gimple> phis;
4920   int vec_num;
4921   tree def0, def1, tem, op0, op1 = NULL_TREE;
4922   bool first_p = true;
4923
4924   /* In case of reduction chain we switch to the first stmt in the chain, but
4925      we don't update STMT_INFO, since only the last stmt is marked as reduction
4926      and has reduction properties.  */
4927   if (GROUP_FIRST_ELEMENT (stmt_info)
4928       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
4929     {
4930       stmt = GROUP_FIRST_ELEMENT (stmt_info);
4931       first_p = false;
4932     }
4933
4934   if (nested_in_vect_loop_p (loop, stmt))
4935     {
4936       outer_loop = loop;
4937       loop = loop->inner;
4938       nested_cycle = true;
4939     }
4940
4941   /* 1. Is vectorizable reduction?  */
4942   /* Not supportable if the reduction variable is used in the loop, unless
4943      it's a reduction chain.  */
4944   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4945       && !GROUP_FIRST_ELEMENT (stmt_info))
4946     return false;
4947
4948   /* Reductions that are not used even in an enclosing outer-loop,
4949      are expected to be "live" (used out of the loop).  */
4950   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4951       && !STMT_VINFO_LIVE_P (stmt_info))
4952     return false;
4953
4954   /* Make sure it was already recognized as a reduction computation.  */
4955   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
4956       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
4957     return false;
4958
4959   /* 2. Has this been recognized as a reduction pattern?
4960
4961      Check if STMT represents a pattern that has been recognized
4962      in earlier analysis stages.  For stmts that represent a pattern,
4963      the STMT_VINFO_RELATED_STMT field records the last stmt in
4964      the original sequence that constitutes the pattern.  */
4965
4966   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
4967   if (orig_stmt)
4968     {
4969       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4970       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4971       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4972     }
4973
4974   /* 3. Check the operands of the operation.  The first operands are defined
4975         inside the loop body. The last operand is the reduction variable,
4976         which is defined by the loop-header-phi.  */
4977
4978   gcc_assert (is_gimple_assign (stmt));
4979
4980   /* Flatten RHS.  */
4981   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4982     {
4983     case GIMPLE_SINGLE_RHS:
4984       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4985       if (op_type == ternary_op)
4986         {
4987           tree rhs = gimple_assign_rhs1 (stmt);
4988           ops[0] = TREE_OPERAND (rhs, 0);
4989           ops[1] = TREE_OPERAND (rhs, 1);
4990           ops[2] = TREE_OPERAND (rhs, 2);
4991           code = TREE_CODE (rhs);
4992         }
4993       else
4994         return false;
4995       break;
4996
4997     case GIMPLE_BINARY_RHS:
4998       code = gimple_assign_rhs_code (stmt);
4999       op_type = TREE_CODE_LENGTH (code);
5000       gcc_assert (op_type == binary_op);
5001       ops[0] = gimple_assign_rhs1 (stmt);
5002       ops[1] = gimple_assign_rhs2 (stmt);
5003       break;
5004
5005     case GIMPLE_TERNARY_RHS:
5006       code = gimple_assign_rhs_code (stmt);
5007       op_type = TREE_CODE_LENGTH (code);
5008       gcc_assert (op_type == ternary_op);
5009       ops[0] = gimple_assign_rhs1 (stmt);
5010       ops[1] = gimple_assign_rhs2 (stmt);
5011       ops[2] = gimple_assign_rhs3 (stmt);
5012       break;
5013
5014     case GIMPLE_UNARY_RHS:
5015       return false;
5016
5017     default:
5018       gcc_unreachable ();
5019     }
5020   /* The default is that the reduction variable is the last in statement.  */
5021   int reduc_index = op_type - 1;
5022
5023   if (code == COND_EXPR && slp_node)
5024     return false;
5025
5026   scalar_dest = gimple_assign_lhs (stmt);
5027   scalar_type = TREE_TYPE (scalar_dest);
5028   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5029       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5030     return false;
5031
5032   /* Do not try to vectorize bit-precision reductions.  */
5033   if ((TYPE_PRECISION (scalar_type)
5034        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5035     return false;
5036
5037   /* All uses but the last are expected to be defined in the loop.
5038      The last use is the reduction variable.  In case of nested cycle this
5039      assumption is not true: we use reduc_index to record the index of the
5040      reduction variable.  */
5041   for (i = 0; i < op_type - 1; i++)
5042     {
5043       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5044       if (i == 0 && code == COND_EXPR)
5045         continue;
5046
5047       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
5048                                             &def_stmt, &def, &dt, &tem);
5049       if (!vectype_in)
5050         vectype_in = tem;
5051       gcc_assert (is_simple_use);
5052
5053       if (dt != vect_internal_def
5054           && dt != vect_external_def
5055           && dt != vect_constant_def
5056           && dt != vect_induction_def
5057           && !(dt == vect_nested_cycle && nested_cycle))
5058         return false;
5059
5060       if (dt == vect_nested_cycle)
5061         {
5062           found_nested_cycle_def = true;
5063           reduc_def_stmt = def_stmt;
5064           reduc_index = i;
5065         }
5066     }
5067
5068   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
5069                                         &def_stmt, &def, &dt, &tem);
5070   if (!vectype_in)
5071     vectype_in = tem;
5072   gcc_assert (is_simple_use);
5073   if (!found_nested_cycle_def)
5074     reduc_def_stmt = def_stmt;
5075
5076   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5077     return false;
5078
5079   if (!(dt == vect_reduction_def
5080         || dt == vect_nested_cycle
5081         || ((dt == vect_internal_def || dt == vect_external_def
5082              || dt == vect_constant_def || dt == vect_induction_def)
5083             && nested_cycle && found_nested_cycle_def)))
5084     {
5085       /* For pattern recognized stmts, orig_stmt might be a reduction,
5086          but some helper statements for the pattern might not, or
5087          might be COND_EXPRs with reduction uses in the condition.  */
5088       gcc_assert (orig_stmt);
5089       return false;
5090     }
5091
5092   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5093                                          !nested_cycle, &dummy, false);
5094   if (orig_stmt)
5095     gcc_assert (tmp == orig_stmt
5096                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5097   else
5098     /* We changed STMT to be the first stmt in reduction chain, hence we
5099        check that in this case the first element in the chain is STMT.  */
5100     gcc_assert (stmt == tmp
5101                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5102
5103   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5104     return false;
5105
5106   if (slp_node || PURE_SLP_STMT (stmt_info))
5107     ncopies = 1;
5108   else
5109     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5110                / TYPE_VECTOR_SUBPARTS (vectype_in));
5111
5112   gcc_assert (ncopies >= 1);
5113
5114   vec_mode = TYPE_MODE (vectype_in);
5115
5116   if (code == COND_EXPR)
5117     {
5118       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
5119         {
5120           if (dump_enabled_p ())
5121             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5122                              "unsupported condition in reduction\n");
5123
5124           return false;
5125         }
5126     }
5127   else
5128     {
5129       /* 4. Supportable by target?  */
5130
5131       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5132           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5133         {
5134           /* Shifts and rotates are only supported by vectorizable_shifts,
5135              not vectorizable_reduction.  */
5136           if (dump_enabled_p ())
5137             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5138                              "unsupported shift or rotation.\n");
5139           return false;
5140         }
5141
5142       /* 4.1. check support for the operation in the loop  */
5143       optab = optab_for_tree_code (code, vectype_in, optab_default);
5144       if (!optab)
5145         {
5146           if (dump_enabled_p ())
5147             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5148                              "no optab.\n");
5149
5150           return false;
5151         }
5152
5153       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5154         {
5155           if (dump_enabled_p ())
5156             dump_printf (MSG_NOTE, "op not supported by target.\n");
5157
5158           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5159               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5160                   < vect_min_worthwhile_factor (code))
5161             return false;
5162
5163           if (dump_enabled_p ())
5164             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5165         }
5166
5167       /* Worthwhile without SIMD support?  */
5168       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5169           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5170              < vect_min_worthwhile_factor (code))
5171         {
5172           if (dump_enabled_p ())
5173             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5174                              "not worthwhile without SIMD support.\n");
5175
5176           return false;
5177         }
5178     }
5179
5180   /* 4.2. Check support for the epilog operation.
5181
5182           If STMT represents a reduction pattern, then the type of the
5183           reduction variable may be different than the type of the rest
5184           of the arguments.  For example, consider the case of accumulation
5185           of shorts into an int accumulator; The original code:
5186                         S1: int_a = (int) short_a;
5187           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5188
5189           was replaced with:
5190                         STMT: int_acc = widen_sum <short_a, int_acc>
5191
5192           This means that:
5193           1. The tree-code that is used to create the vector operation in the
5194              epilog code (that reduces the partial results) is not the
5195              tree-code of STMT, but is rather the tree-code of the original
5196              stmt from the pattern that STMT is replacing.  I.e, in the example
5197              above we want to use 'widen_sum' in the loop, but 'plus' in the
5198              epilog.
5199           2. The type (mode) we use to check available target support
5200              for the vector operation to be created in the *epilog*, is
5201              determined by the type of the reduction variable (in the example
5202              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5203              However the type (mode) we use to check available target support
5204              for the vector operation to be created *inside the loop*, is
5205              determined by the type of the other arguments to STMT (in the
5206              example we'd check this: optab_handler (widen_sum_optab,
5207              vect_short_mode)).
5208
5209           This is contrary to "regular" reductions, in which the types of all
5210           the arguments are the same as the type of the reduction variable.
5211           For "regular" reductions we can therefore use the same vector type
5212           (and also the same tree-code) when generating the epilog code and
5213           when generating the code inside the loop.  */
5214
5215   if (orig_stmt)
5216     {
5217       /* This is a reduction pattern: get the vectype from the type of the
5218          reduction variable, and get the tree-code from orig_stmt.  */
5219       orig_code = gimple_assign_rhs_code (orig_stmt);
5220       gcc_assert (vectype_out);
5221       vec_mode = TYPE_MODE (vectype_out);
5222     }
5223   else
5224     {
5225       /* Regular reduction: use the same vectype and tree-code as used for
5226          the vector code inside the loop can be used for the epilog code. */
5227       orig_code = code;
5228     }
5229
5230   if (nested_cycle)
5231     {
5232       def_bb = gimple_bb (reduc_def_stmt);
5233       def_stmt_loop = def_bb->loop_father;
5234       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5235                                        loop_preheader_edge (def_stmt_loop));
5236       if (TREE_CODE (def_arg) == SSA_NAME
5237           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5238           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5239           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5240           && vinfo_for_stmt (def_arg_stmt)
5241           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5242               == vect_double_reduction_def)
5243         double_reduc = true;
5244     }
5245
5246   epilog_reduc_code = ERROR_MARK;
5247   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5248     {
5249       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5250                                          optab_default);
5251       if (!reduc_optab)
5252         {
5253           if (dump_enabled_p ())
5254             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5255                              "no optab for reduction.\n");
5256
5257           epilog_reduc_code = ERROR_MARK;
5258         }
5259       else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5260         {
5261           optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5262           if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5263             {
5264               if (dump_enabled_p ())
5265                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5266                                  "reduc op not supported by target.\n");
5267
5268               epilog_reduc_code = ERROR_MARK;
5269             }
5270         }
5271     }
5272   else
5273     {
5274       if (!nested_cycle || double_reduc)
5275         {
5276           if (dump_enabled_p ())
5277             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5278                              "no reduc code for scalar code.\n");
5279
5280           return false;
5281         }
5282     }
5283
5284   if (double_reduc && ncopies > 1)
5285     {
5286       if (dump_enabled_p ())
5287         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5288                          "multiple types in double reduction\n");
5289
5290       return false;
5291     }
5292
5293   /* In case of widenning multiplication by a constant, we update the type
5294      of the constant to be the type of the other operand.  We check that the
5295      constant fits the type in the pattern recognition pass.  */
5296   if (code == DOT_PROD_EXPR
5297       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5298     {
5299       if (TREE_CODE (ops[0]) == INTEGER_CST)
5300         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5301       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5302         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5303       else
5304         {
5305           if (dump_enabled_p ())
5306             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5307                              "invalid types in dot-prod\n");
5308
5309           return false;
5310         }
5311     }
5312
5313   if (!vec_stmt) /* transformation not required.  */
5314     {
5315       if (first_p
5316           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5317                                          reduc_index))
5318         return false;
5319       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5320       return true;
5321     }
5322
5323   /** Transform.  **/
5324
5325   if (dump_enabled_p ())
5326     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5327
5328   /* FORNOW: Multiple types are not supported for condition.  */
5329   if (code == COND_EXPR)
5330     gcc_assert (ncopies == 1);
5331
5332   /* Create the destination vector  */
5333   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5334
5335   /* In case the vectorization factor (VF) is bigger than the number
5336      of elements that we can fit in a vectype (nunits), we have to generate
5337      more than one vector stmt - i.e - we need to "unroll" the
5338      vector stmt by a factor VF/nunits.  For more details see documentation
5339      in vectorizable_operation.  */
5340
5341   /* If the reduction is used in an outer loop we need to generate
5342      VF intermediate results, like so (e.g. for ncopies=2):
5343         r0 = phi (init, r0)
5344         r1 = phi (init, r1)
5345         r0 = x0 + r0;
5346         r1 = x1 + r1;
5347     (i.e. we generate VF results in 2 registers).
5348     In this case we have a separate def-use cycle for each copy, and therefore
5349     for each copy we get the vector def for the reduction variable from the
5350     respective phi node created for this copy.
5351
5352     Otherwise (the reduction is unused in the loop nest), we can combine
5353     together intermediate results, like so (e.g. for ncopies=2):
5354         r = phi (init, r)
5355         r = x0 + r;
5356         r = x1 + r;
5357    (i.e. we generate VF/2 results in a single register).
5358    In this case for each copy we get the vector def for the reduction variable
5359    from the vectorized reduction operation generated in the previous iteration.
5360   */
5361
5362   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5363     {
5364       single_defuse_cycle = true;
5365       epilog_copies = 1;
5366     }
5367   else
5368     epilog_copies = ncopies;
5369
5370   prev_stmt_info = NULL;
5371   prev_phi_info = NULL;
5372   if (slp_node)
5373     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5374   else
5375     {
5376       vec_num = 1;
5377       vec_oprnds0.create (1);
5378       if (op_type == ternary_op)
5379         vec_oprnds1.create (1);
5380     }
5381
5382   phis.create (vec_num);
5383   vect_defs.create (vec_num);
5384   if (!slp_node)
5385     vect_defs.quick_push (NULL_TREE);
5386
5387   for (j = 0; j < ncopies; j++)
5388     {
5389       if (j == 0 || !single_defuse_cycle)
5390         {
5391           for (i = 0; i < vec_num; i++)
5392             {
5393               /* Create the reduction-phi that defines the reduction
5394                  operand.  */
5395               new_phi = create_phi_node (vec_dest, loop->header);
5396               set_vinfo_for_stmt (new_phi,
5397                                   new_stmt_vec_info (new_phi, loop_vinfo,
5398                                                      NULL));
5399                if (j == 0 || slp_node)
5400                  phis.quick_push (new_phi);
5401             }
5402         }
5403
5404       if (code == COND_EXPR)
5405         {
5406           gcc_assert (!slp_node);
5407           vectorizable_condition (stmt, gsi, vec_stmt,
5408                                   PHI_RESULT (phis[0]),
5409                                   reduc_index, NULL);
5410           /* Multiple types are not supported for condition.  */
5411           break;
5412         }
5413
5414       /* Handle uses.  */
5415       if (j == 0)
5416         {
5417           op0 = ops[!reduc_index];
5418           if (op_type == ternary_op)
5419             {
5420               if (reduc_index == 0)
5421                 op1 = ops[2];
5422               else
5423                 op1 = ops[1];
5424             }
5425
5426           if (slp_node)
5427             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5428                                slp_node, -1);
5429           else
5430             {
5431               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5432                                                             stmt, NULL);
5433               vec_oprnds0.quick_push (loop_vec_def0);
5434               if (op_type == ternary_op)
5435                {
5436                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5437                                                                NULL);
5438                  vec_oprnds1.quick_push (loop_vec_def1);
5439                }
5440             }
5441         }
5442       else
5443         {
5444           if (!slp_node)
5445             {
5446               enum vect_def_type dt;
5447               gimple dummy_stmt;
5448               tree dummy;
5449
5450               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5451                                   &dummy_stmt, &dummy, &dt);
5452               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5453                                                               loop_vec_def0);
5454               vec_oprnds0[0] = loop_vec_def0;
5455               if (op_type == ternary_op)
5456                 {
5457                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5458                                       &dummy, &dt);
5459                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5460                                                                 loop_vec_def1);
5461                   vec_oprnds1[0] = loop_vec_def1;
5462                 }
5463             }
5464
5465           if (single_defuse_cycle)
5466             reduc_def = gimple_assign_lhs (new_stmt);
5467
5468           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5469         }
5470
5471       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5472         {
5473           if (slp_node)
5474             reduc_def = PHI_RESULT (phis[i]);
5475           else
5476             {
5477               if (!single_defuse_cycle || j == 0)
5478                 reduc_def = PHI_RESULT (new_phi);
5479             }
5480
5481           def1 = ((op_type == ternary_op)
5482                   ? vec_oprnds1[i] : NULL);
5483           if (op_type == binary_op)
5484             {
5485               if (reduc_index == 0)
5486                 expr = build2 (code, vectype_out, reduc_def, def0);
5487               else
5488                 expr = build2 (code, vectype_out, def0, reduc_def);
5489             }
5490           else
5491             {
5492               if (reduc_index == 0)
5493                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5494               else
5495                 {
5496                   if (reduc_index == 1)
5497                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5498                   else
5499                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5500                 }
5501             }
5502
5503           new_stmt = gimple_build_assign (vec_dest, expr);
5504           new_temp = make_ssa_name (vec_dest, new_stmt);
5505           gimple_assign_set_lhs (new_stmt, new_temp);
5506           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5507
5508           if (slp_node)
5509             {
5510               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5511               vect_defs.quick_push (new_temp);
5512             }
5513           else
5514             vect_defs[0] = new_temp;
5515         }
5516
5517       if (slp_node)
5518         continue;
5519
5520       if (j == 0)
5521         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5522       else
5523         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5524
5525       prev_stmt_info = vinfo_for_stmt (new_stmt);
5526       prev_phi_info = vinfo_for_stmt (new_phi);
5527     }
5528
5529   /* Finalize the reduction-phi (set its arguments) and create the
5530      epilog reduction code.  */
5531   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5532     {
5533       new_temp = gimple_assign_lhs (*vec_stmt);
5534       vect_defs[0] = new_temp;
5535     }
5536
5537   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5538                                     epilog_reduc_code, phis, reduc_index,
5539                                     double_reduc, slp_node);
5540
5541   return true;
5542 }
5543
5544 /* Function vect_min_worthwhile_factor.
5545
5546    For a loop where we could vectorize the operation indicated by CODE,
5547    return the minimum vectorization factor that makes it worthwhile
5548    to use generic vectors.  */
5549 int
5550 vect_min_worthwhile_factor (enum tree_code code)
5551 {
5552   switch (code)
5553     {
5554     case PLUS_EXPR:
5555     case MINUS_EXPR:
5556     case NEGATE_EXPR:
5557       return 4;
5558
5559     case BIT_AND_EXPR:
5560     case BIT_IOR_EXPR:
5561     case BIT_XOR_EXPR:
5562     case BIT_NOT_EXPR:
5563       return 2;
5564
5565     default:
5566       return INT_MAX;
5567     }
5568 }
5569
5570
5571 /* Function vectorizable_induction
5572
5573    Check if PHI performs an induction computation that can be vectorized.
5574    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5575    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5576    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5577
5578 bool
5579 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5580                         gimple *vec_stmt)
5581 {
5582   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5583   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5584   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5585   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5586   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5587   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5588   tree vec_def;
5589
5590   gcc_assert (ncopies >= 1);
5591   /* FORNOW. These restrictions should be relaxed.  */
5592   if (nested_in_vect_loop_p (loop, phi))
5593     {
5594       imm_use_iterator imm_iter;
5595       use_operand_p use_p;
5596       gimple exit_phi;
5597       edge latch_e;
5598       tree loop_arg;
5599
5600       if (ncopies > 1)
5601         {
5602           if (dump_enabled_p ())
5603             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604                              "multiple types in nested loop.\n");
5605           return false;
5606         }
5607
5608       exit_phi = NULL;
5609       latch_e = loop_latch_edge (loop->inner);
5610       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5611       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5612         {
5613           gimple use_stmt = USE_STMT (use_p);
5614           if (is_gimple_debug (use_stmt))
5615             continue;
5616
5617           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
5618             {
5619               exit_phi = use_stmt;
5620               break;
5621             }
5622         }
5623       if (exit_phi)
5624         {
5625           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5626           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5627                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5628             {
5629               if (dump_enabled_p ())
5630                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5631                                  "inner-loop induction only used outside "
5632                                  "of the outer vectorized loop.\n");
5633               return false;
5634             }
5635         }
5636     }
5637
5638   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5639     return false;
5640
5641   /* FORNOW: SLP not supported.  */
5642   if (STMT_SLP_TYPE (stmt_info))
5643     return false;
5644
5645   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5646
5647   if (gimple_code (phi) != GIMPLE_PHI)
5648     return false;
5649
5650   if (!vec_stmt) /* transformation not required.  */
5651     {
5652       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5653       if (dump_enabled_p ())
5654         dump_printf_loc (MSG_NOTE, vect_location,
5655                          "=== vectorizable_induction ===\n");
5656       vect_model_induction_cost (stmt_info, ncopies);
5657       return true;
5658     }
5659
5660   /** Transform.  **/
5661
5662   if (dump_enabled_p ())
5663     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5664
5665   vec_def = get_initial_def_for_induction (phi);
5666   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5667   return true;
5668 }
5669
5670 /* Function vectorizable_live_operation.
5671
5672    STMT computes a value that is used outside the loop.  Check if
5673    it can be supported.  */
5674
5675 bool
5676 vectorizable_live_operation (gimple stmt,
5677                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5678                              gimple *vec_stmt)
5679 {
5680   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5681   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5682   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5683   int i;
5684   int op_type;
5685   tree op;
5686   tree def;
5687   gimple def_stmt;
5688   enum vect_def_type dt;
5689   enum tree_code code;
5690   enum gimple_rhs_class rhs_class;
5691
5692   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5693
5694   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5695     return false;
5696
5697   if (!is_gimple_assign (stmt))
5698     {
5699       if (gimple_call_internal_p (stmt)
5700           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5701           && gimple_call_lhs (stmt)
5702           && loop->simduid
5703           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5704           && loop->simduid
5705              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5706         {
5707           edge e = single_exit (loop);
5708           basic_block merge_bb = e->dest;
5709           imm_use_iterator imm_iter;
5710           use_operand_p use_p;
5711           tree lhs = gimple_call_lhs (stmt);
5712
5713           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5714             {
5715               gimple use_stmt = USE_STMT (use_p);
5716               if (gimple_code (use_stmt) == GIMPLE_PHI
5717                   && gimple_bb (use_stmt) == merge_bb)
5718                 {
5719                   if (vec_stmt)
5720                     {
5721                       tree vfm1
5722                         = build_int_cst (unsigned_type_node,
5723                                          loop_vinfo->vectorization_factor - 1);
5724                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5725                     }
5726                   return true;
5727                 }
5728             }
5729         }
5730
5731       return false;
5732     }
5733
5734   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5735     return false;
5736
5737   /* FORNOW. CHECKME. */
5738   if (nested_in_vect_loop_p (loop, stmt))
5739     return false;
5740
5741   code = gimple_assign_rhs_code (stmt);
5742   op_type = TREE_CODE_LENGTH (code);
5743   rhs_class = get_gimple_rhs_class (code);
5744   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5745   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5746
5747   /* FORNOW: support only if all uses are invariant.  This means
5748      that the scalar operations can remain in place, unvectorized.
5749      The original last scalar value that they compute will be used.  */
5750
5751   for (i = 0; i < op_type; i++)
5752     {
5753       if (rhs_class == GIMPLE_SINGLE_RHS)
5754         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5755       else
5756         op = gimple_op (stmt, i + 1);
5757       if (op
5758           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5759                                   &dt))
5760         {
5761           if (dump_enabled_p ())
5762             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5763                              "use not simple.\n");
5764           return false;
5765         }
5766
5767       if (dt != vect_external_def && dt != vect_constant_def)
5768         return false;
5769     }
5770
5771   /* No transformation is required for the cases we currently support.  */
5772   return true;
5773 }
5774
5775 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5776
5777 static void
5778 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5779 {
5780   ssa_op_iter op_iter;
5781   imm_use_iterator imm_iter;
5782   def_operand_p def_p;
5783   gimple ustmt;
5784
5785   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5786     {
5787       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5788         {
5789           basic_block bb;
5790
5791           if (!is_gimple_debug (ustmt))
5792             continue;
5793
5794           bb = gimple_bb (ustmt);
5795
5796           if (!flow_bb_inside_loop_p (loop, bb))
5797             {
5798               if (gimple_debug_bind_p (ustmt))
5799                 {
5800                   if (dump_enabled_p ())
5801                     dump_printf_loc (MSG_NOTE, vect_location,
5802                                      "killing debug use\n");
5803
5804                   gimple_debug_bind_reset_value (ustmt);
5805                   update_stmt (ustmt);
5806                 }
5807               else
5808                 gcc_unreachable ();
5809             }
5810         }
5811     }
5812 }
5813
5814
5815 /* This function builds ni_name = number of iterations.  Statements
5816    are emitted on the loop preheader edge.  */
5817
5818 static tree
5819 vect_build_loop_niters (loop_vec_info loop_vinfo)
5820 {
5821   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5822   if (TREE_CODE (ni) == INTEGER_CST)
5823     return ni;
5824   else
5825     {
5826       tree ni_name, var;
5827       gimple_seq stmts = NULL;
5828       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
5829
5830       var = create_tmp_var (TREE_TYPE (ni), "niters");
5831       ni_name = force_gimple_operand (ni, &stmts, false, var);
5832       if (stmts)
5833         gsi_insert_seq_on_edge_immediate (pe, stmts);
5834
5835       return ni_name;
5836     }
5837 }
5838
5839
5840 /* This function generates the following statements:
5841
5842    ni_name = number of iterations loop executes
5843    ratio = ni_name / vf
5844    ratio_mult_vf_name = ratio * vf
5845
5846    and places them on the loop preheader edge.  */
5847
5848 static void
5849 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5850                                  tree ni_name,
5851                                  tree *ratio_mult_vf_name_ptr,
5852                                  tree *ratio_name_ptr)
5853 {
5854   tree ni_minus_gap_name;
5855   tree var;
5856   tree ratio_name;
5857   tree ratio_mult_vf_name;
5858   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5859   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
5860   tree log_vf;
5861
5862   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
5863
5864   /* If epilogue loop is required because of data accesses with gaps, we
5865      subtract one iteration from the total number of iterations here for
5866      correct calculation of RATIO.  */
5867   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5868     {
5869       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5870                                        ni_name,
5871                                        build_one_cst (TREE_TYPE (ni_name)));
5872       if (!is_gimple_val (ni_minus_gap_name))
5873         {
5874           var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
5875           gimple stmts = NULL;
5876           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
5877                                                     true, var);
5878           gsi_insert_seq_on_edge_immediate (pe, stmts);
5879         }
5880     }
5881   else
5882     ni_minus_gap_name = ni_name;
5883
5884   /* Create: ratio = ni >> log2(vf) */
5885   /* ???  As we have ni == number of latch executions + 1, ni could
5886      have overflown to zero.  So avoid computing ratio based on ni
5887      but compute it using the fact that we know ratio will be at least
5888      one, thus via (ni - vf) >> log2(vf) + 1.  */
5889   ratio_name
5890     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
5891                    fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
5892                                 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5893                                              ni_minus_gap_name,
5894                                              build_int_cst
5895                                                (TREE_TYPE (ni_name), vf)),
5896                                 log_vf),
5897                    build_int_cst (TREE_TYPE (ni_name), 1));
5898   if (!is_gimple_val (ratio_name))
5899     {
5900       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
5901       gimple stmts = NULL;
5902       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
5903       gsi_insert_seq_on_edge_immediate (pe, stmts);
5904     }
5905   *ratio_name_ptr = ratio_name;
5906
5907   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
5908
5909   if (ratio_mult_vf_name_ptr)
5910     {
5911       ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5912                                         ratio_name, log_vf);
5913       if (!is_gimple_val (ratio_mult_vf_name))
5914         {
5915           var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
5916           gimple stmts = NULL;
5917           ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
5918                                                      true, var);
5919           gsi_insert_seq_on_edge_immediate (pe, stmts);
5920         }
5921       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5922     }
5923
5924   return;
5925 }
5926
5927
5928 /* Function vect_transform_loop.
5929
5930    The analysis phase has determined that the loop is vectorizable.
5931    Vectorize the loop - created vectorized stmts to replace the scalar
5932    stmts in the loop, and update the loop exit condition.  */
5933
5934 void
5935 vect_transform_loop (loop_vec_info loop_vinfo)
5936 {
5937   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5938   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5939   int nbbs = loop->num_nodes;
5940   int i;
5941   tree ratio = NULL;
5942   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5943   bool grouped_store;
5944   bool slp_scheduled = false;
5945   gimple stmt, pattern_stmt;
5946   gimple_seq pattern_def_seq = NULL;
5947   gimple_stmt_iterator pattern_def_si = gsi_none ();
5948   bool transform_pattern_stmt = false;
5949   bool check_profitability = false;
5950   int th;
5951   /* Record number of iterations before we started tampering with the profile. */
5952   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5953
5954   if (dump_enabled_p ())
5955     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5956
5957   /* If profile is inprecise, we have chance to fix it up.  */
5958   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5959     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5960
5961   /* Use the more conservative vectorization threshold.  If the number
5962      of iterations is constant assume the cost check has been performed
5963      by our caller.  If the threshold makes all loops profitable that
5964      run at least the vectorization factor number of times checking
5965      is pointless, too.  */
5966   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
5967   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5968       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5969     {
5970       if (dump_enabled_p ())
5971         dump_printf_loc (MSG_NOTE, vect_location,
5972                          "Profitability threshold is %d loop iterations.\n",
5973                          th);
5974       check_profitability = true;
5975     }
5976
5977   /* Version the loop first, if required, so the profitability check
5978      comes first.  */
5979
5980   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5981       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5982     {
5983       vect_loop_versioning (loop_vinfo, th, check_profitability);
5984       check_profitability = false;
5985     }
5986
5987   tree ni_name = vect_build_loop_niters (loop_vinfo);
5988   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
5989
5990   /* Peel the loop if there are data refs with unknown alignment.
5991      Only one data ref with unknown store is allowed.  */
5992
5993   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
5994     {
5995       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
5996                                      th, check_profitability);
5997       check_profitability = false;
5998       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
5999          be re-computed.  */
6000       ni_name = NULL_TREE;
6001     }
6002
6003   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6004      compile time constant), or it is a constant that doesn't divide by the
6005      vectorization factor, then an epilog loop needs to be created.
6006      We therefore duplicate the loop: the original loop will be vectorized,
6007      and will compute the first (n/VF) iterations.  The second copy of the loop
6008      will remain scalar and will compute the remaining (n%VF) iterations.
6009      (VF is the vectorization factor).  */
6010
6011   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6012       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6013     {
6014       tree ratio_mult_vf;
6015       if (!ni_name)
6016         ni_name = vect_build_loop_niters (loop_vinfo);
6017       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6018                                        &ratio);
6019       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6020                                       th, check_profitability);
6021     }
6022   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6023     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6024                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6025   else
6026     {
6027       if (!ni_name)
6028         ni_name = vect_build_loop_niters (loop_vinfo);
6029       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6030     }
6031
6032   /* 1) Make sure the loop header has exactly two entries
6033      2) Make sure we have a preheader basic block.  */
6034
6035   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6036
6037   split_edge (loop_preheader_edge (loop));
6038
6039   /* FORNOW: the vectorizer supports only loops which body consist
6040      of one basic block (header + empty latch). When the vectorizer will
6041      support more involved loop forms, the order by which the BBs are
6042      traversed need to be reconsidered.  */
6043
6044   for (i = 0; i < nbbs; i++)
6045     {
6046       basic_block bb = bbs[i];
6047       stmt_vec_info stmt_info;
6048
6049       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6050            gsi_next (&si))
6051         {
6052           gphi *phi = si.phi ();
6053           if (dump_enabled_p ())
6054             {
6055               dump_printf_loc (MSG_NOTE, vect_location,
6056                                "------>vectorizing phi: ");
6057               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6058               dump_printf (MSG_NOTE, "\n");
6059             }
6060           stmt_info = vinfo_for_stmt (phi);
6061           if (!stmt_info)
6062             continue;
6063
6064           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6065             vect_loop_kill_debug_uses (loop, phi);
6066
6067           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6068               && !STMT_VINFO_LIVE_P (stmt_info))
6069             continue;
6070
6071           if (STMT_VINFO_VECTYPE (stmt_info)
6072               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6073                   != (unsigned HOST_WIDE_INT) vectorization_factor)
6074               && dump_enabled_p ())
6075             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6076
6077           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6078             {
6079               if (dump_enabled_p ())
6080                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6081               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6082             }
6083         }
6084
6085       pattern_stmt = NULL;
6086       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6087            !gsi_end_p (si) || transform_pattern_stmt;)
6088         {
6089           bool is_store;
6090
6091           if (transform_pattern_stmt)
6092             stmt = pattern_stmt;
6093           else
6094             {
6095               stmt = gsi_stmt (si);
6096               /* During vectorization remove existing clobber stmts.  */
6097               if (gimple_clobber_p (stmt))
6098                 {
6099                   unlink_stmt_vdef (stmt);
6100                   gsi_remove (&si, true);
6101                   release_defs (stmt);
6102                   continue;
6103                 }
6104             }
6105
6106           if (dump_enabled_p ())
6107             {
6108               dump_printf_loc (MSG_NOTE, vect_location,
6109                                "------>vectorizing statement: ");
6110               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6111               dump_printf (MSG_NOTE, "\n");
6112             }
6113
6114           stmt_info = vinfo_for_stmt (stmt);
6115
6116           /* vector stmts created in the outer-loop during vectorization of
6117              stmts in an inner-loop may not have a stmt_info, and do not
6118              need to be vectorized.  */
6119           if (!stmt_info)
6120             {
6121               gsi_next (&si);
6122               continue;
6123             }
6124
6125           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6126             vect_loop_kill_debug_uses (loop, stmt);
6127
6128           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6129               && !STMT_VINFO_LIVE_P (stmt_info))
6130             {
6131               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6132                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6133                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6134                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6135                 {
6136                   stmt = pattern_stmt;
6137                   stmt_info = vinfo_for_stmt (stmt);
6138                 }
6139               else
6140                 {
6141                   gsi_next (&si);
6142                   continue;
6143                 }
6144             }
6145           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6146                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6147                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6148                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6149             transform_pattern_stmt = true;
6150
6151           /* If pattern statement has def stmts, vectorize them too.  */
6152           if (is_pattern_stmt_p (stmt_info))
6153             {
6154               if (pattern_def_seq == NULL)
6155                 {
6156                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6157                   pattern_def_si = gsi_start (pattern_def_seq);
6158                 }
6159               else if (!gsi_end_p (pattern_def_si))
6160                 gsi_next (&pattern_def_si);
6161               if (pattern_def_seq != NULL)
6162                 {
6163                   gimple pattern_def_stmt = NULL;
6164                   stmt_vec_info pattern_def_stmt_info = NULL;
6165
6166                   while (!gsi_end_p (pattern_def_si))
6167                     {
6168                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6169                       pattern_def_stmt_info
6170                         = vinfo_for_stmt (pattern_def_stmt);
6171                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6172                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6173                         break;
6174                       gsi_next (&pattern_def_si);
6175                     }
6176
6177                   if (!gsi_end_p (pattern_def_si))
6178                     {
6179                       if (dump_enabled_p ())
6180                         {
6181                           dump_printf_loc (MSG_NOTE, vect_location,
6182                                            "==> vectorizing pattern def "
6183                                            "stmt: ");
6184                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6185                                             pattern_def_stmt, 0);
6186                           dump_printf (MSG_NOTE, "\n");
6187                         }
6188
6189                       stmt = pattern_def_stmt;
6190                       stmt_info = pattern_def_stmt_info;
6191                     }
6192                   else
6193                     {
6194                       pattern_def_si = gsi_none ();
6195                       transform_pattern_stmt = false;
6196                     }
6197                 }
6198               else
6199                 transform_pattern_stmt = false;
6200             }
6201
6202           if (STMT_VINFO_VECTYPE (stmt_info))
6203             {
6204               unsigned int nunits
6205                 = (unsigned int)
6206                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6207               if (!STMT_SLP_TYPE (stmt_info)
6208                   && nunits != (unsigned int) vectorization_factor
6209                   && dump_enabled_p ())
6210                   /* For SLP VF is set according to unrolling factor, and not
6211                      to vector size, hence for SLP this print is not valid.  */
6212                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6213             }
6214
6215           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6216              reached.  */
6217           if (STMT_SLP_TYPE (stmt_info))
6218             {
6219               if (!slp_scheduled)
6220                 {
6221                   slp_scheduled = true;
6222
6223                   if (dump_enabled_p ())
6224                     dump_printf_loc (MSG_NOTE, vect_location,
6225                                      "=== scheduling SLP instances ===\n");
6226
6227                   vect_schedule_slp (loop_vinfo, NULL);
6228                 }
6229
6230               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6231               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6232                 {
6233                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6234                     {
6235                       pattern_def_seq = NULL;
6236                       gsi_next (&si);
6237                     }
6238                   continue;
6239                 }
6240             }
6241
6242           /* -------- vectorize statement ------------ */
6243           if (dump_enabled_p ())
6244             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6245
6246           grouped_store = false;
6247           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6248           if (is_store)
6249             {
6250               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6251                 {
6252                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6253                      interleaving chain was completed - free all the stores in
6254                      the chain.  */
6255                   gsi_next (&si);
6256                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6257                 }
6258               else
6259                 {
6260                   /* Free the attached stmt_vec_info and remove the stmt.  */
6261                   gimple store = gsi_stmt (si);
6262                   free_stmt_vec_info (store);
6263                   unlink_stmt_vdef (store);
6264                   gsi_remove (&si, true);
6265                   release_defs (store);
6266                 }
6267
6268               /* Stores can only appear at the end of pattern statements.  */
6269               gcc_assert (!transform_pattern_stmt);
6270               pattern_def_seq = NULL;
6271             }
6272           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6273             {
6274               pattern_def_seq = NULL;
6275               gsi_next (&si);
6276             }
6277         }                       /* stmts in BB */
6278     }                           /* BBs in loop */
6279
6280   slpeel_make_loop_iterate_ntimes (loop, ratio);
6281
6282   /* Reduce loop iterations by the vectorization factor.  */
6283   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6284                       expected_iterations / vectorization_factor);
6285   loop->nb_iterations_upper_bound
6286     = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6287   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6288       && loop->nb_iterations_upper_bound != 0)
6289     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6290   if (loop->any_estimate)
6291     {
6292       loop->nb_iterations_estimate
6293         = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6294        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6295            && loop->nb_iterations_estimate != 0)
6296          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6297     }
6298
6299   if (dump_enabled_p ())
6300     {
6301       dump_printf_loc (MSG_NOTE, vect_location,
6302                        "LOOP VECTORIZED\n");
6303       if (loop->inner)
6304         dump_printf_loc (MSG_NOTE, vect_location,
6305                          "OUTER LOOP VECTORIZED\n");
6306       dump_printf (MSG_NOTE, "\n");
6307     }
6308 }