gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-ssa.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "expr.h"
  35 #include "recog.h"
  36 #include "optabs.h"
  37 #include "params.h"
  38 #include "diagnostic-core.h"
  39 #include "tree-chrec.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "tree-vectorizer.h"
  42 #include "target.h"
  43
  44 /* Loop Vectorization Pass.
  45
  46    This pass tries to vectorize loops.
  47
  48    For example, the vectorizer transforms the following simple loop:
  49
  50         short a[N]; short b[N]; short c[N]; int i;
  51
  52         for (i=0; i<N; i++){
  53           a[i] = b[i] + c[i];
  54         }
  55
  56    as if it was manually vectorized by rewriting the source code into:
  57
  58         typedef int __attribute__((mode(V8HI))) v8hi;
  59         short a[N];  short b[N]; short c[N];   int i;
  60         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  61         v8hi va, vb, vc;
  62
  63         for (i=0; i<N/8; i++){
  64           vb = pb[i];
  65           vc = pc[i];
  66           va = vb + vc;
  67           pa[i] = va;
  68         }
  69
  70         The main entry to this pass is vectorize_loops(), in which
  71    the vectorizer applies a set of analyses on a given set of loops,
  72    followed by the actual vectorization transformation for the loops that
  73    had successfully passed the analysis phase.
  74         Throughout this pass we make a distinction between two types of
  75    data: scalars (which are represented by SSA_NAMES), and memory references
  76    ("data-refs").  These two types of data require different handling both
  77    during analysis and transformation. The types of data-refs that the
  78    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  79    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  80    accesses are required to have a simple (consecutive) access pattern.
  81
  82    Analysis phase:
  83    ===============
  84         The driver for the analysis phase is vect_analyze_loop().
  85    It applies a set of analyses, some of which rely on the scalar evolution
  86    analyzer (scev) developed by Sebastian Pop.
  87
  88         During the analysis phase the vectorizer records some information
  89    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  90    loop, as well as general information about the loop as a whole, which is
  91    recorded in a "loop_vec_info" struct attached to each loop.
  92
  93    Transformation phase:
  94    =====================
  95         The loop transformation phase scans all the stmts in the loop, and
  96    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  97    the loop that needs to be vectorized.  It inserts the vector code sequence
  98    just before the scalar stmt S, and records a pointer to the vector code
  99    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 100    attached to S).  This pointer will be used for the vectorization of following
 101    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 102    otherwise, we rely on dead code elimination for removing it.
 103
 104         For example, say stmt S1 was vectorized into stmt VS1:
 105
 106    VS1: vb = px[i];
 107    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 108    S2:  a = b;
 109
 110    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 111    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 112    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 113    resulting sequence would be:
 114
 115    VS1: vb = px[i];
 116    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 117    VS2: va = vb;
 118    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 119
 120         Operands that are not SSA_NAMEs, are data-refs that appear in
 121    load/store operations (like 'x[i]' in S1), and are handled differently.
 122
 123    Target modeling:
 124    =================
 125         Currently the only target specific information that is used is the
 126    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 127    Targets that can support different sizes of vectors, for now will need
 128    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 129    flexibility will be added in the future.
 130
 131         Since we only vectorize operations which vector form can be
 132    expressed using existing tree codes, to verify that an operation is
 133    supported, the vectorizer checks the relevant optab at the relevant
 134    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 135    the value found is CODE_FOR_nothing, then there's no target support, and
 136    we can't vectorize the stmt.
 137
 138    For additional information on this project see:
 139    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 140 */
 141
 142 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 143
 144 /* Function vect_determine_vectorization_factor
 145
 146    Determine the vectorization factor (VF).  VF is the number of data elements
 147    that are operated upon in parallel in a single iteration of the vectorized
 148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 150    elements can fit in a single vector register.
 151
 152    We currently support vectorization of loops in which all types operated upon
 153    are of the same size.  Therefore this function currently sets VF according to
 154    the size of the types operated upon, and fails if there are multiple sizes
 155    in the loop.
 156
 157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 158    original loop:
 159         for (i=0; i<N; i++){
 160           a[i] = b[i] + c[i];
 161         }
 162
 163    vectorized loop:
 164         for (i=0; i<N; i+=VF){
 165           a[i:VF] = b[i:VF] + c[i:VF];
 166         }
 167 */
 168
 169 static bool
 170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 171 {
 172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 174   int nbbs = loop->num_nodes;
 175   gimple_stmt_iterator si;
 176   unsigned int vectorization_factor = 0;
 177   tree scalar_type;
 178   gimple phi;
 179   tree vectype;
 180   unsigned int nunits;
 181   stmt_vec_info stmt_info;
 182   int i;
 183   HOST_WIDE_INT dummy;
 184   gimple stmt, pattern_stmt = NULL;
 185   gimple_seq pattern_def_seq = NULL;
 186   gimple_stmt_iterator pattern_def_si = gsi_none ();
 187   bool analyze_pattern_stmt = false;
 188
 189   if (dump_enabled_p ())
 190     dump_printf_loc (MSG_NOTE, vect_location,
 191                      "=== vect_determine_vectorization_factor ===\n");
 192
 193   for (i = 0; i < nbbs; i++)
 194     {
 195       basic_block bb = bbs[i];
 196
 197       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 198         {
 199           phi = gsi_stmt (si);
 200           stmt_info = vinfo_for_stmt (phi);
 201           if (dump_enabled_p ())
 202             {
 203               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 204               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 205               dump_printf (MSG_NOTE, "\n");
 206             }
 207
 208           gcc_assert (stmt_info);
 209
 210           if (STMT_VINFO_RELEVANT_P (stmt_info))
 211             {
 212               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 213               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 214
 215               if (dump_enabled_p ())
 216                 {
 217                   dump_printf_loc (MSG_NOTE, vect_location,
 218                                    "get vectype for scalar type:  ");
 219                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 220                   dump_printf (MSG_NOTE, "\n");
 221                 }
 222
 223               vectype = get_vectype_for_scalar_type (scalar_type);
 224               if (!vectype)
 225                 {
 226                   if (dump_enabled_p ())
 227                     {
 228                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 229                                        "not vectorized: unsupported "
 230                                        "data-type ");
 231                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 232                                          scalar_type);
 233                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 234                     }
 235                   return false;
 236                 }
 237               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 238
 239               if (dump_enabled_p ())
 240                 {
 241                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 242                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 243                   dump_printf (MSG_NOTE, "\n");
 244                 }
 245
 246               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 247               if (dump_enabled_p ())
 248                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 249                                  nunits);
 250
 251               if (!vectorization_factor
 252                   || (nunits > vectorization_factor))
 253                 vectorization_factor = nunits;
 254             }
 255         }
 256
 257       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 258         {
 259           tree vf_vectype;
 260
 261           if (analyze_pattern_stmt)
 262             stmt = pattern_stmt;
 263           else
 264             stmt = gsi_stmt (si);
 265
 266           stmt_info = vinfo_for_stmt (stmt);
 267
 268           if (dump_enabled_p ())
 269             {
 270               dump_printf_loc (MSG_NOTE, vect_location,
 271                                "==> examining statement: ");
 272               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 273               dump_printf (MSG_NOTE, "\n");
 274             }
 275
 276           gcc_assert (stmt_info);
 277
 278           /* Skip stmts which do not need to be vectorized.  */
 279           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 280                && !STMT_VINFO_LIVE_P (stmt_info))
 281               || gimple_clobber_p (stmt))
 282             {
 283               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 284                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 285                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 286                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 287                 {
 288                   stmt = pattern_stmt;
 289                   stmt_info = vinfo_for_stmt (pattern_stmt);
 290                   if (dump_enabled_p ())
 291                     {
 292                       dump_printf_loc (MSG_NOTE, vect_location,
 293                                        "==> examining pattern statement: ");
 294                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 295                       dump_printf (MSG_NOTE, "\n");
 296                     }
 297                 }
 298               else
 299                 {
 300                   if (dump_enabled_p ())
 301                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 302                   gsi_next (&si);
 303                   continue;
 304                 }
 305             }
 306           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 307                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 308                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 309                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 310             analyze_pattern_stmt = true;
 311
 312           /* If a pattern statement has def stmts, analyze them too.  */
 313           if (is_pattern_stmt_p (stmt_info))
 314             {
 315               if (pattern_def_seq == NULL)
 316                 {
 317                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 318                   pattern_def_si = gsi_start (pattern_def_seq);
 319                 }
 320               else if (!gsi_end_p (pattern_def_si))
 321                 gsi_next (&pattern_def_si);
 322               if (pattern_def_seq != NULL)
 323                 {
 324                   gimple pattern_def_stmt = NULL;
 325                   stmt_vec_info pattern_def_stmt_info = NULL;
 326
 327                   while (!gsi_end_p (pattern_def_si))
 328                     {
 329                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 330                       pattern_def_stmt_info
 331                         = vinfo_for_stmt (pattern_def_stmt);
 332                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 333                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 334                         break;
 335                       gsi_next (&pattern_def_si);
 336                     }
 337
 338                   if (!gsi_end_p (pattern_def_si))
 339                     {
 340                       if (dump_enabled_p ())
 341                         {
 342                           dump_printf_loc (MSG_NOTE, vect_location,
 343                                            "==> examining pattern def stmt: ");
 344                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 345                                             pattern_def_stmt, 0);
 346                           dump_printf (MSG_NOTE, "\n");
 347                         }
 348
 349                       stmt = pattern_def_stmt;
 350                       stmt_info = pattern_def_stmt_info;
 351                     }
 352                   else
 353                     {
 354                       pattern_def_si = gsi_none ();
 355                       analyze_pattern_stmt = false;
 356                     }
 357                 }
 358               else
 359                 analyze_pattern_stmt = false;
 360             }
 361
 362           if (gimple_get_lhs (stmt) == NULL_TREE)
 363             {
 364               if (dump_enabled_p ())
 365                 {
 366                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 367                                    "not vectorized: irregular stmt.");
 368                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 369                                     0);
 370                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 371                 }
 372               return false;
 373             }
 374
 375           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 376             {
 377               if (dump_enabled_p ())
 378                 {
 379                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 380                                    "not vectorized: vector stmt in loop:");
 381                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 382                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 383                 }
 384               return false;
 385             }
 386
 387           if (STMT_VINFO_VECTYPE (stmt_info))
 388             {
 389               /* The only case when a vectype had been already set is for stmts
 390                  that contain a dataref, or for "pattern-stmts" (stmts
 391                  generated by the vectorizer to represent/replace a certain
 392                  idiom).  */
 393               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 394                           || is_pattern_stmt_p (stmt_info)
 395                           || !gsi_end_p (pattern_def_si));
 396               vectype = STMT_VINFO_VECTYPE (stmt_info);
 397             }
 398           else
 399             {
 400               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 401               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 402               if (dump_enabled_p ())
 403                 {
 404                   dump_printf_loc (MSG_NOTE, vect_location,
 405                                    "get vectype for scalar type:  ");
 406                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 407                   dump_printf (MSG_NOTE, "\n");
 408                 }
 409               vectype = get_vectype_for_scalar_type (scalar_type);
 410               if (!vectype)
 411                 {
 412                   if (dump_enabled_p ())
 413                     {
 414                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 415                                        "not vectorized: unsupported "
 416                                        "data-type ");
 417                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 418                                          scalar_type);
 419                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 420                     }
 421                   return false;
 422                 }
 423
 424               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 425
 426               if (dump_enabled_p ())
 427                 {
 428                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 429                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 430                   dump_printf (MSG_NOTE, "\n");
 431                 }
 432             }
 433
 434           /* The vectorization factor is according to the smallest
 435              scalar type (or the largest vector size, but we only
 436              support one vector size per loop).  */
 437           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 438                                                        &dummy);
 439           if (dump_enabled_p ())
 440             {
 441               dump_printf_loc (MSG_NOTE, vect_location,
 442                                "get vectype for scalar type:  ");
 443               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 444               dump_printf (MSG_NOTE, "\n");
 445             }
 446           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 447           if (!vf_vectype)
 448             {
 449               if (dump_enabled_p ())
 450                 {
 451                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 452                                    "not vectorized: unsupported data-type ");
 453                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 454                                      scalar_type);
 455                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 456                 }
 457               return false;
 458             }
 459
 460           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 461                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 462             {
 463               if (dump_enabled_p ())
 464                 {
 465                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 466                                    "not vectorized: different sized vector "
 467                                    "types in statement, ");
 468                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 469                                      vectype);
 470                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 471                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 472                                      vf_vectype);
 473                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 474                 }
 475               return false;
 476             }
 477
 478           if (dump_enabled_p ())
 479             {
 480               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 481               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 482               dump_printf (MSG_NOTE, "\n");
 483             }
 484
 485           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 486           if (dump_enabled_p ())
 487             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 488           if (!vectorization_factor
 489               || (nunits > vectorization_factor))
 490             vectorization_factor = nunits;
 491
 492           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 493             {
 494               pattern_def_seq = NULL;
 495               gsi_next (&si);
 496             }
 497         }
 498     }
 499
 500   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 501   if (dump_enabled_p ())
 502     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 503                      vectorization_factor);
 504   if (vectorization_factor <= 1)
 505     {
 506       if (dump_enabled_p ())
 507         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 508                          "not vectorized: unsupported data-type\n");
 509       return false;
 510     }
 511   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 512
 513   return true;
 514 }
 515
 516
 517 /* Function vect_is_simple_iv_evolution.
 518
 519    FORNOW: A simple evolution of an induction variables in the loop is
 520    considered a polynomial evolution.  */
 521
 522 static bool
 523 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 524                              tree * step)
 525 {
 526   tree init_expr;
 527   tree step_expr;
 528   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 529   basic_block bb;
 530
 531   /* When there is no evolution in this loop, the evolution function
 532      is not "simple".  */
 533   if (evolution_part == NULL_TREE)
 534     return false;
 535
 536   /* When the evolution is a polynomial of degree >= 2
 537      the evolution function is not "simple".  */
 538   if (tree_is_chrec (evolution_part))
 539     return false;
 540
 541   step_expr = evolution_part;
 542   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 543
 544   if (dump_enabled_p ())
 545     {
 546       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 547       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 548       dump_printf (MSG_NOTE, ",  init: ");
 549       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 550       dump_printf (MSG_NOTE, "\n");
 551     }
 552
 553   *init = init_expr;
 554   *step = step_expr;
 555
 556   if (TREE_CODE (step_expr) != INTEGER_CST
 557       && (TREE_CODE (step_expr) != SSA_NAME
 558           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 559               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 560           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 561               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 562                   || !flag_associative_math)))
 563       && (TREE_CODE (step_expr) != REAL_CST
 564           || !flag_associative_math))
 565     {
 566       if (dump_enabled_p ())
 567         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 568                          "step unknown.\n");
 569       return false;
 570     }
 571
 572   return true;
 573 }
 574
 575 /* Function vect_analyze_scalar_cycles_1.
 576
 577    Examine the cross iteration def-use cycles of scalar variables
 578    in LOOP.  LOOP_VINFO represents the loop that is now being
 579    considered for vectorization (can be LOOP, or an outer-loop
 580    enclosing LOOP).  */
 581
 582 static void
 583 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 584 {
 585   basic_block bb = loop->header;
 586   tree init, step;
 587   vec<gimple> worklist;
 588   worklist.create (64);
 589   gimple_stmt_iterator gsi;
 590   bool double_reduc;
 591
 592   if (dump_enabled_p ())
 593     dump_printf_loc (MSG_NOTE, vect_location,
 594                      "=== vect_analyze_scalar_cycles ===\n");
 595
 596   /* First - identify all inductions.  Reduction detection assumes that all the
 597      inductions have been identified, therefore, this order must not be
 598      changed.  */
 599   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 600     {
 601       gimple phi = gsi_stmt (gsi);
 602       tree access_fn = NULL;
 603       tree def = PHI_RESULT (phi);
 604       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 605
 606       if (dump_enabled_p ())
 607         {
 608           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 609           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 610           dump_printf (MSG_NOTE, "\n");
 611         }
 612
 613       /* Skip virtual phi's.  The data dependences that are associated with
 614          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 615       if (virtual_operand_p (def))
 616         continue;
 617
 618       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 619
 620       /* Analyze the evolution function.  */
 621       access_fn = analyze_scalar_evolution (loop, def);
 622       if (access_fn)
 623         {
 624           STRIP_NOPS (access_fn);
 625           if (dump_enabled_p ())
 626             {
 627               dump_printf_loc (MSG_NOTE, vect_location,
 628                                "Access function of PHI: ");
 629               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 630               dump_printf (MSG_NOTE, "\n");
 631             }
 632           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 633             = evolution_part_in_loop_num (access_fn, loop->num);
 634         }
 635
 636       if (!access_fn
 637           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 638           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 639               && TREE_CODE (step) != INTEGER_CST))
 640         {
 641           worklist.safe_push (phi);
 642           continue;
 643         }
 644
 645       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 646
 647       if (dump_enabled_p ())
 648         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 649       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 650     }
 651
 652
 653   /* Second - identify all reductions and nested cycles.  */
 654   while (worklist.length () > 0)
 655     {
 656       gimple phi = worklist.pop ();
 657       tree def = PHI_RESULT (phi);
 658       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 659       gimple reduc_stmt;
 660       bool nested_cycle;
 661
 662       if (dump_enabled_p ())
 663         {
 664           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 665           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 666           dump_printf (MSG_NOTE, "\n");
 667         }
 668
 669       gcc_assert (!virtual_operand_p (def)
 670                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 671
 672       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 673       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 674                                                 &double_reduc);
 675       if (reduc_stmt)
 676         {
 677           if (double_reduc)
 678             {
 679               if (dump_enabled_p ())
 680                 dump_printf_loc (MSG_NOTE, vect_location,
 681                                  "Detected double reduction.\n");
 682
 683               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 684               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 685                                                     vect_double_reduction_def;
 686             }
 687           else
 688             {
 689               if (nested_cycle)
 690                 {
 691                   if (dump_enabled_p ())
 692                     dump_printf_loc (MSG_NOTE, vect_location,
 693                                      "Detected vectorizable nested cycle.\n");
 694
 695                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 696                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 697                                                              vect_nested_cycle;
 698                 }
 699               else
 700                 {
 701                   if (dump_enabled_p ())
 702                     dump_printf_loc (MSG_NOTE, vect_location,
 703                                      "Detected reduction.\n");
 704
 705                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 706                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 707                                                            vect_reduction_def;
 708                   /* Store the reduction cycles for possible vectorization in
 709                      loop-aware SLP.  */
 710                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 711                 }
 712             }
 713         }
 714       else
 715         if (dump_enabled_p ())
 716           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 717                            "Unknown def-use cycle pattern.\n");
 718     }
 719
 720   worklist.release ();
 721 }
 722
 723
 724 /* Function vect_analyze_scalar_cycles.
 725
 726    Examine the cross iteration def-use cycles of scalar variables, by
 727    analyzing the loop-header PHIs of scalar variables.  Classify each
 728    cycle as one of the following: invariant, induction, reduction, unknown.
 729    We do that for the loop represented by LOOP_VINFO, and also to its
 730    inner-loop, if exists.
 731    Examples for scalar cycles:
 732
 733    Example1: reduction:
 734
 735               loop1:
 736               for (i=0; i<N; i++)
 737                  sum += a[i];
 738
 739    Example2: induction:
 740
 741               loop2:
 742               for (i=0; i<N; i++)
 743                  a[i] = i;  */
 744
 745 static void
 746 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 747 {
 748   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 749
 750   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 751
 752   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 753      Reductions in such inner-loop therefore have different properties than
 754      the reductions in the nest that gets vectorized:
 755      1. When vectorized, they are executed in the same order as in the original
 756         scalar loop, so we can't change the order of computation when
 757         vectorizing them.
 758      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 759         current checks are too strict.  */
 760
 761   if (loop->inner)
 762     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 763 }
 764
 765 /* Function vect_get_loop_niters.
 766
 767    Determine how many iterations the loop is executed.
 768    If an expression that represents the number of iterations
 769    can be constructed, place it in NUMBER_OF_ITERATIONS.
 770    Return the loop exit condition.  */
 771
 772 static gimple
 773 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 774 {
 775   tree niters;
 776
 777   if (dump_enabled_p ())
 778     dump_printf_loc (MSG_NOTE, vect_location,
 779                      "=== get_loop_niters ===\n");
 780   niters = number_of_exit_cond_executions (loop);
 781
 782   if (niters != NULL_TREE
 783       && niters != chrec_dont_know)
 784     {
 785       *number_of_iterations = niters;
 786
 787       if (dump_enabled_p ())
 788         {
 789           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 790           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 791           dump_printf (MSG_NOTE, "\n");
 792         }
 793     }
 794
 795   return get_loop_exit_condition (loop);
 796 }
 797
 798
 799 /* Function bb_in_loop_p
 800
 801    Used as predicate for dfs order traversal of the loop bbs.  */
 802
 803 static bool
 804 bb_in_loop_p (const_basic_block bb, const void *data)
 805 {
 806   const struct loop *const loop = (const struct loop *)data;
 807   if (flow_bb_inside_loop_p (loop, bb))
 808     return true;
 809   return false;
 810 }
 811
 812
 813 /* Function new_loop_vec_info.
 814
 815    Create and initialize a new loop_vec_info struct for LOOP, as well as
 816    stmt_vec_info structs for all the stmts in LOOP.  */
 817
 818 static loop_vec_info
 819 new_loop_vec_info (struct loop *loop)
 820 {
 821   loop_vec_info res;
 822   basic_block *bbs;
 823   gimple_stmt_iterator si;
 824   unsigned int i, nbbs;
 825
 826   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 827   LOOP_VINFO_LOOP (res) = loop;
 828
 829   bbs = get_loop_body (loop);
 830
 831   /* Create/Update stmt_info for all stmts in the loop.  */
 832   for (i = 0; i < loop->num_nodes; i++)
 833     {
 834       basic_block bb = bbs[i];
 835
 836       /* BBs in a nested inner-loop will have been already processed (because
 837          we will have called vect_analyze_loop_form for any nested inner-loop).
 838          Therefore, for stmts in an inner-loop we just want to update the
 839          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 840          loop_info of the outer-loop we are currently considering to vectorize
 841          (instead of the loop_info of the inner-loop).
 842          For stmts in other BBs we need to create a stmt_info from scratch.  */
 843       if (bb->loop_father != loop)
 844         {
 845           /* Inner-loop bb.  */
 846           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 847           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 848             {
 849               gimple phi = gsi_stmt (si);
 850               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 851               loop_vec_info inner_loop_vinfo =
 852                 STMT_VINFO_LOOP_VINFO (stmt_info);
 853               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 854               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 855             }
 856           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 857            {
 858               gimple stmt = gsi_stmt (si);
 859               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 860               loop_vec_info inner_loop_vinfo =
 861                  STMT_VINFO_LOOP_VINFO (stmt_info);
 862               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 863               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 864            }
 865         }
 866       else
 867         {
 868           /* bb in current nest.  */
 869           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 870             {
 871               gimple phi = gsi_stmt (si);
 872               gimple_set_uid (phi, 0);
 873               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 874             }
 875
 876           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 877             {
 878               gimple stmt = gsi_stmt (si);
 879               gimple_set_uid (stmt, 0);
 880               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 881             }
 882         }
 883     }
 884
 885   /* CHECKME: We want to visit all BBs before their successors (except for
 886      latch blocks, for which this assertion wouldn't hold).  In the simple
 887      case of the loop forms we allow, a dfs order of the BBs would the same
 888      as reversed postorder traversal, so we are safe.  */
 889
 890    free (bbs);
 891    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 892    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 893                               bbs, loop->num_nodes, loop);
 894    gcc_assert (nbbs == loop->num_nodes);
 895
 896   LOOP_VINFO_BBS (res) = bbs;
 897   LOOP_VINFO_NITERS (res) = NULL;
 898   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 899   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 900   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 901   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 902   LOOP_VINFO_VECT_FACTOR (res) = 0;
 903   LOOP_VINFO_LOOP_NEST (res).create (3);
 904   LOOP_VINFO_DATAREFS (res).create (10);
 905   LOOP_VINFO_DDRS (res).create (10 * 10);
 906   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 907   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 908              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 909   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 910              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 911   LOOP_VINFO_GROUPED_STORES (res).create (10);
 912   LOOP_VINFO_REDUCTIONS (res).create (10);
 913   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 914   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 915   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 916   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 917   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 918   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 919
 920   return res;
 921 }
 922
 923
 924 /* Function destroy_loop_vec_info.
 925
 926    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 927    stmts in the loop.  */
 928
 929 void
 930 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 931 {
 932   struct loop *loop;
 933   basic_block *bbs;
 934   int nbbs;
 935   gimple_stmt_iterator si;
 936   int j;
 937   vec<slp_instance> slp_instances;
 938   slp_instance instance;
 939   bool swapped;
 940
 941   if (!loop_vinfo)
 942     return;
 943
 944   loop = LOOP_VINFO_LOOP (loop_vinfo);
 945
 946   bbs = LOOP_VINFO_BBS (loop_vinfo);
 947   nbbs = clean_stmts ? loop->num_nodes : 0;
 948   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 949
 950   for (j = 0; j < nbbs; j++)
 951     {
 952       basic_block bb = bbs[j];
 953       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 954         free_stmt_vec_info (gsi_stmt (si));
 955
 956       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 957         {
 958           gimple stmt = gsi_stmt (si);
 959
 960           /* We may have broken canonical form by moving a constant
 961              into RHS1 of a commutative op.  Fix such occurrences.  */
 962           if (swapped && is_gimple_assign (stmt))
 963             {
 964               enum tree_code code = gimple_assign_rhs_code (stmt);
 965
 966               if ((code == PLUS_EXPR
 967                    || code == POINTER_PLUS_EXPR
 968                    || code == MULT_EXPR)
 969                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 970                 swap_ssa_operands (stmt,
 971                                    gimple_assign_rhs1_ptr (stmt),
 972                                    gimple_assign_rhs2_ptr (stmt));
 973             }
 974
 975           /* Free stmt_vec_info.  */
 976           free_stmt_vec_info (stmt);
 977           gsi_next (&si);
 978         }
 979     }
 980
 981   free (LOOP_VINFO_BBS (loop_vinfo));
 982   vect_destroy_datarefs (loop_vinfo, NULL);
 983   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 984   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
 985   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
 986   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
 987   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 988   FOR_EACH_VEC_ELT (slp_instances, j, instance)
 989     vect_free_slp_instance (instance);
 990
 991   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
 992   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
 993   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
 994   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
 995
 996   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
 997     LOOP_VINFO_PEELING_HTAB (loop_vinfo).dispose ();
 998
 999   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1000
1001   free (loop_vinfo);
1002   loop->aux = NULL;
1003 }
1004
1005
1006 /* Function vect_analyze_loop_1.
1007
1008    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1009    for it. The different analyses will record information in the
1010    loop_vec_info struct.  This is a subset of the analyses applied in
1011    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1012    that is now considered for (outer-loop) vectorization.  */
1013
1014 static loop_vec_info
1015 vect_analyze_loop_1 (struct loop *loop)
1016 {
1017   loop_vec_info loop_vinfo;
1018
1019   if (dump_enabled_p ())
1020     dump_printf_loc (MSG_NOTE, vect_location,
1021                      "===== analyze_loop_nest_1 =====\n");
1022
1023   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1024
1025   loop_vinfo = vect_analyze_loop_form (loop);
1026   if (!loop_vinfo)
1027     {
1028       if (dump_enabled_p ())
1029         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1030                          "bad inner-loop form.\n");
1031       return NULL;
1032     }
1033
1034   return loop_vinfo;
1035 }
1036
1037
1038 /* Function vect_analyze_loop_form.
1039
1040    Verify that certain CFG restrictions hold, including:
1041    - the loop has a pre-header
1042    - the loop has a single entry and exit
1043    - the loop exit condition is simple enough, and the number of iterations
1044      can be analyzed (a countable loop).  */
1045
1046 loop_vec_info
1047 vect_analyze_loop_form (struct loop *loop)
1048 {
1049   loop_vec_info loop_vinfo;
1050   gimple loop_cond;
1051   tree number_of_iterations = NULL;
1052   loop_vec_info inner_loop_vinfo = NULL;
1053
1054   if (dump_enabled_p ())
1055     dump_printf_loc (MSG_NOTE, vect_location,
1056                      "=== vect_analyze_loop_form ===\n");
1057
1058   /* Different restrictions apply when we are considering an inner-most loop,
1059      vs. an outer (nested) loop.
1060      (FORNOW. May want to relax some of these restrictions in the future).  */
1061
1062   if (!loop->inner)
1063     {
1064       /* Inner-most loop.  We currently require that the number of BBs is
1065          exactly 2 (the header and latch).  Vectorizable inner-most loops
1066          look like this:
1067
1068                         (pre-header)
1069                            |
1070                           header <--------+
1071                            | |            |
1072                            | +--> latch --+
1073                            |
1074                         (exit-bb)  */
1075
1076       if (loop->num_nodes != 2)
1077         {
1078           if (dump_enabled_p ())
1079             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1080                              "not vectorized: control flow in loop.\n");
1081           return NULL;
1082         }
1083
1084       if (empty_block_p (loop->header))
1085     {
1086           if (dump_enabled_p ())
1087             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1088                              "not vectorized: empty loop.\n");
1089       return NULL;
1090     }
1091     }
1092   else
1093     {
1094       struct loop *innerloop = loop->inner;
1095       edge entryedge;
1096
1097       /* Nested loop. We currently require that the loop is doubly-nested,
1098          contains a single inner loop, and the number of BBs is exactly 5.
1099          Vectorizable outer-loops look like this:
1100
1101                         (pre-header)
1102                            |
1103                           header <---+
1104                            |         |
1105                           inner-loop |
1106                            |         |
1107                           tail ------+
1108                            |
1109                         (exit-bb)
1110
1111          The inner-loop has the properties expected of inner-most loops
1112          as described above.  */
1113
1114       if ((loop->inner)->inner || (loop->inner)->next)
1115         {
1116           if (dump_enabled_p ())
1117             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1118                              "not vectorized: multiple nested loops.\n");
1119           return NULL;
1120         }
1121
1122       /* Analyze the inner-loop.  */
1123       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1124       if (!inner_loop_vinfo)
1125         {
1126           if (dump_enabled_p ())
1127             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1128                              "not vectorized: Bad inner loop.\n");
1129           return NULL;
1130         }
1131
1132       if (!expr_invariant_in_loop_p (loop,
1133                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1134         {
1135           if (dump_enabled_p ())
1136             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137                              "not vectorized: inner-loop count not"
1138                              " invariant.\n");
1139           destroy_loop_vec_info (inner_loop_vinfo, true);
1140           return NULL;
1141         }
1142
1143       if (loop->num_nodes != 5)
1144         {
1145           if (dump_enabled_p ())
1146             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1147                              "not vectorized: control flow in loop.\n");
1148           destroy_loop_vec_info (inner_loop_vinfo, true);
1149           return NULL;
1150         }
1151
1152       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1153       entryedge = EDGE_PRED (innerloop->header, 0);
1154       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1155         entryedge = EDGE_PRED (innerloop->header, 1);
1156
1157       if (entryedge->src != loop->header
1158           || !single_exit (innerloop)
1159           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1160         {
1161           if (dump_enabled_p ())
1162             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1163                              "not vectorized: unsupported outerloop form.\n");
1164           destroy_loop_vec_info (inner_loop_vinfo, true);
1165           return NULL;
1166         }
1167
1168       if (dump_enabled_p ())
1169         dump_printf_loc (MSG_NOTE, vect_location,
1170                          "Considering outer-loop vectorization.\n");
1171     }
1172
1173   if (!single_exit (loop)
1174       || EDGE_COUNT (loop->header->preds) != 2)
1175     {
1176       if (dump_enabled_p ())
1177         {
1178           if (!single_exit (loop))
1179             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1180                              "not vectorized: multiple exits.\n");
1181           else if (EDGE_COUNT (loop->header->preds) != 2)
1182             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1183                              "not vectorized: too many incoming edges.\n");
1184         }
1185       if (inner_loop_vinfo)
1186         destroy_loop_vec_info (inner_loop_vinfo, true);
1187       return NULL;
1188     }
1189
1190   /* We assume that the loop exit condition is at the end of the loop. i.e,
1191      that the loop is represented as a do-while (with a proper if-guard
1192      before the loop if needed), where the loop header contains all the
1193      executable statements, and the latch is empty.  */
1194   if (!empty_block_p (loop->latch)
1195       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1196     {
1197       if (dump_enabled_p ())
1198         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1199                          "not vectorized: latch block not empty.\n");
1200       if (inner_loop_vinfo)
1201         destroy_loop_vec_info (inner_loop_vinfo, true);
1202       return NULL;
1203     }
1204
1205   /* Make sure there exists a single-predecessor exit bb:  */
1206   if (!single_pred_p (single_exit (loop)->dest))
1207     {
1208       edge e = single_exit (loop);
1209       if (!(e->flags & EDGE_ABNORMAL))
1210         {
1211           split_loop_exit_edge (e);
1212           if (dump_enabled_p ())
1213             dump_printf (MSG_NOTE, "split exit edge.\n");
1214         }
1215       else
1216         {
1217           if (dump_enabled_p ())
1218             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1219                              "not vectorized: abnormal loop exit edge.\n");
1220           if (inner_loop_vinfo)
1221             destroy_loop_vec_info (inner_loop_vinfo, true);
1222           return NULL;
1223         }
1224     }
1225
1226   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1227   if (!loop_cond)
1228     {
1229       if (dump_enabled_p ())
1230         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1231                          "not vectorized: complicated exit condition.\n");
1232       if (inner_loop_vinfo)
1233         destroy_loop_vec_info (inner_loop_vinfo, true);
1234       return NULL;
1235     }
1236
1237   if (!number_of_iterations)
1238     {
1239       if (dump_enabled_p ())
1240         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1241                          "not vectorized: number of iterations cannot be "
1242                          "computed.\n");
1243       if (inner_loop_vinfo)
1244         destroy_loop_vec_info (inner_loop_vinfo, true);
1245       return NULL;
1246     }
1247
1248   if (chrec_contains_undetermined (number_of_iterations))
1249     {
1250       if (dump_enabled_p ())
1251             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1252                              "Infinite number of iterations.\n");
1253       if (inner_loop_vinfo)
1254         destroy_loop_vec_info (inner_loop_vinfo, true);
1255       return NULL;
1256     }
1257
1258   if (!NITERS_KNOWN_P (number_of_iterations))
1259     {
1260       if (dump_enabled_p ())
1261         {
1262           dump_printf_loc (MSG_NOTE, vect_location,
1263                            "Symbolic number of iterations is ");
1264           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1265           dump_printf (MSG_NOTE, "\n");
1266         }
1267     }
1268   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1269     {
1270       if (dump_enabled_p ())
1271         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1272                          "not vectorized: number of iterations = 0.\n");
1273       if (inner_loop_vinfo)
1274         destroy_loop_vec_info (inner_loop_vinfo, true);
1275       return NULL;
1276     }
1277
1278   loop_vinfo = new_loop_vec_info (loop);
1279   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1280   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1281
1282   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1283
1284   /* CHECKME: May want to keep it around it in the future.  */
1285   if (inner_loop_vinfo)
1286     destroy_loop_vec_info (inner_loop_vinfo, false);
1287
1288   gcc_assert (!loop->aux);
1289   loop->aux = loop_vinfo;
1290   return loop_vinfo;
1291 }
1292
1293
1294 /* Function vect_analyze_loop_operations.
1295
1296    Scan the loop stmts and make sure they are all vectorizable.  */
1297
1298 static bool
1299 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1300 {
1301   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1302   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1303   int nbbs = loop->num_nodes;
1304   gimple_stmt_iterator si;
1305   unsigned int vectorization_factor = 0;
1306   int i;
1307   gimple phi;
1308   stmt_vec_info stmt_info;
1309   bool need_to_vectorize = false;
1310   int min_profitable_iters;
1311   int min_scalar_loop_bound;
1312   unsigned int th;
1313   bool only_slp_in_loop = true, ok;
1314   HOST_WIDE_INT max_niter;
1315   HOST_WIDE_INT estimated_niter;
1316   int min_profitable_estimate;
1317
1318   if (dump_enabled_p ())
1319     dump_printf_loc (MSG_NOTE, vect_location,
1320                      "=== vect_analyze_loop_operations ===\n");
1321
1322   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1323   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1324   if (slp)
1325     {
1326       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1327          vectorization factor of the loop is the unrolling factor required by
1328          the SLP instances.  If that unrolling factor is 1, we say, that we
1329          perform pure SLP on loop - cross iteration parallelism is not
1330          exploited.  */
1331       for (i = 0; i < nbbs; i++)
1332         {
1333           basic_block bb = bbs[i];
1334           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1335             {
1336               gimple stmt = gsi_stmt (si);
1337               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1338               gcc_assert (stmt_info);
1339               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1340                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1341                   && !PURE_SLP_STMT (stmt_info))
1342                 /* STMT needs both SLP and loop-based vectorization.  */
1343                 only_slp_in_loop = false;
1344             }
1345         }
1346
1347       if (only_slp_in_loop)
1348         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1349       else
1350         vectorization_factor = least_common_multiple (vectorization_factor,
1351                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1352
1353       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1354       if (dump_enabled_p ())
1355         dump_printf_loc (MSG_NOTE, vect_location,
1356                          "Updating vectorization factor to %d\n",
1357                          vectorization_factor);
1358     }
1359
1360   for (i = 0; i < nbbs; i++)
1361     {
1362       basic_block bb = bbs[i];
1363
1364       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1365         {
1366           phi = gsi_stmt (si);
1367           ok = true;
1368
1369           stmt_info = vinfo_for_stmt (phi);
1370           if (dump_enabled_p ())
1371             {
1372               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1373               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1374               dump_printf (MSG_NOTE, "\n");
1375             }
1376
1377           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1378              (i.e., a phi in the tail of the outer-loop).  */
1379           if (! is_loop_header_bb_p (bb))
1380             {
1381               /* FORNOW: we currently don't support the case that these phis
1382                  are not used in the outerloop (unless it is double reduction,
1383                  i.e., this phi is vect_reduction_def), cause this case
1384                  requires to actually do something here.  */
1385               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1386                    || STMT_VINFO_LIVE_P (stmt_info))
1387                   && STMT_VINFO_DEF_TYPE (stmt_info)
1388                      != vect_double_reduction_def)
1389                 {
1390                   if (dump_enabled_p ())
1391                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392                                      "Unsupported loop-closed phi in "
1393                                      "outer-loop.\n");
1394                   return false;
1395                 }
1396
1397               /* If PHI is used in the outer loop, we check that its operand
1398                  is defined in the inner loop.  */
1399               if (STMT_VINFO_RELEVANT_P (stmt_info))
1400                 {
1401                   tree phi_op;
1402                   gimple op_def_stmt;
1403
1404                   if (gimple_phi_num_args (phi) != 1)
1405                     return false;
1406
1407                   phi_op = PHI_ARG_DEF (phi, 0);
1408                   if (TREE_CODE (phi_op) != SSA_NAME)
1409                     return false;
1410
1411                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1412                   if (gimple_nop_p (op_def_stmt)
1413                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1414                       || !vinfo_for_stmt (op_def_stmt))
1415                     return false;
1416
1417                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1418                         != vect_used_in_outer
1419                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1420                            != vect_used_in_outer_by_reduction)
1421                     return false;
1422                 }
1423
1424               continue;
1425             }
1426
1427           gcc_assert (stmt_info);
1428
1429           if (STMT_VINFO_LIVE_P (stmt_info))
1430             {
1431               /* FORNOW: not yet supported.  */
1432               if (dump_enabled_p ())
1433                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1434                                  "not vectorized: value used after loop.\n");
1435               return false;
1436             }
1437
1438           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1439               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1440             {
1441               /* A scalar-dependence cycle that we don't support.  */
1442               if (dump_enabled_p ())
1443                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1444                                  "not vectorized: scalar dependence cycle.\n");
1445               return false;
1446             }
1447
1448           if (STMT_VINFO_RELEVANT_P (stmt_info))
1449             {
1450               need_to_vectorize = true;
1451               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1452                 ok = vectorizable_induction (phi, NULL, NULL);
1453             }
1454
1455           if (!ok)
1456             {
1457               if (dump_enabled_p ())
1458                 {
1459                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1460                                    "not vectorized: relevant phi not "
1461                                    "supported: ");
1462                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1463                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1464                 }
1465               return false;
1466             }
1467         }
1468
1469       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1470         {
1471           gimple stmt = gsi_stmt (si);
1472           if (!gimple_clobber_p (stmt)
1473               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1474             return false;
1475         }
1476     } /* bbs */
1477
1478   /* All operations in the loop are either irrelevant (deal with loop
1479      control, or dead), or only used outside the loop and can be moved
1480      out of the loop (e.g. invariants, inductions).  The loop can be
1481      optimized away by scalar optimizations.  We're better off not
1482      touching this loop.  */
1483   if (!need_to_vectorize)
1484     {
1485       if (dump_enabled_p ())
1486         dump_printf_loc (MSG_NOTE, vect_location,
1487                          "All the computation can be taken out of the loop.\n");
1488       if (dump_enabled_p ())
1489         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1490                          "not vectorized: redundant loop. no profit to "
1491                          "vectorize.\n");
1492       return false;
1493     }
1494
1495   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1496     dump_printf_loc (MSG_NOTE, vect_location,
1497                      "vectorization_factor = %d, niters = "
1498                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1499                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1500
1501   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1502        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1503       || ((max_niter = max_stmt_executions_int (loop)) != -1
1504           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1505     {
1506       if (dump_enabled_p ())
1507         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1508                          "not vectorized: iteration count too small.\n");
1509       if (dump_enabled_p ())
1510         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1511                          "not vectorized: iteration count smaller than "
1512                          "vectorization factor.\n");
1513       return false;
1514     }
1515
1516   /* Analyze cost.  Decide if worth while to vectorize.  */
1517
1518   /* Once VF is set, SLP costs should be updated since the number of created
1519      vector stmts depends on VF.  */
1520   vect_update_slp_costs_according_to_vf (loop_vinfo);
1521
1522   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1523                                       &min_profitable_estimate);
1524   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1525
1526   if (min_profitable_iters < 0)
1527     {
1528       if (dump_enabled_p ())
1529         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1530                          "not vectorized: vectorization not profitable.\n");
1531       if (dump_enabled_p ())
1532         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533                          "not vectorized: vector version will never be "
1534                          "profitable.\n");
1535       return false;
1536     }
1537
1538   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1539                             * vectorization_factor) - 1);
1540
1541
1542   /* Use the cost model only if it is more conservative than user specified
1543      threshold.  */
1544
1545   th = (unsigned) min_scalar_loop_bound;
1546   if (min_profitable_iters
1547       && (!min_scalar_loop_bound
1548           || min_profitable_iters > min_scalar_loop_bound))
1549     th = (unsigned) min_profitable_iters;
1550
1551   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1552       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1553     {
1554       if (dump_enabled_p ())
1555         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1556                          "not vectorized: vectorization not profitable.\n");
1557       if (dump_enabled_p ())
1558         dump_printf_loc (MSG_NOTE, vect_location,
1559                          "not vectorized: iteration count smaller than user "
1560                          "specified loop bound parameter or minimum profitable "
1561                          "iterations (whichever is more conservative).\n");
1562       return false;
1563     }
1564
1565   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1566       && ((unsigned HOST_WIDE_INT) estimated_niter
1567           <= MAX (th, (unsigned)min_profitable_estimate)))
1568     {
1569       if (dump_enabled_p ())
1570         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1571                          "not vectorized: estimated iteration count too "
1572                          "small.\n");
1573       if (dump_enabled_p ())
1574         dump_printf_loc (MSG_NOTE, vect_location,
1575                          "not vectorized: estimated iteration count smaller "
1576                          "than specified loop bound parameter or minimum "
1577                          "profitable iterations (whichever is more "
1578                          "conservative).\n");
1579       return false;
1580     }
1581
1582   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1583       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1584       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1585     {
1586       if (dump_enabled_p ())
1587         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
1588       if (!vect_can_advance_ivs_p (loop_vinfo))
1589         {
1590           if (dump_enabled_p ())
1591             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1592                              "not vectorized: can't create epilog loop 1.\n");
1593           return false;
1594         }
1595       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1596         {
1597           if (dump_enabled_p ())
1598             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599                              "not vectorized: can't create epilog loop 2.\n");
1600           return false;
1601         }
1602     }
1603
1604   return true;
1605 }
1606
1607
1608 /* Function vect_analyze_loop_2.
1609
1610    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1611    for it.  The different analyses will record information in the
1612    loop_vec_info struct.  */
1613 static bool
1614 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1615 {
1616   bool ok, slp = false;
1617   int max_vf = MAX_VECTORIZATION_FACTOR;
1618   int min_vf = 2;
1619
1620   /* Find all data references in the loop (which correspond to vdefs/vuses)
1621      and analyze their evolution in the loop.  Also adjust the minimal
1622      vectorization factor according to the loads and stores.
1623
1624      FORNOW: Handle only simple, array references, which
1625      alignment can be forced, and aligned pointer-references.  */
1626
1627   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1628   if (!ok)
1629     {
1630       if (dump_enabled_p ())
1631         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1632                          "bad data references.\n");
1633       return false;
1634     }
1635
1636   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1637      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1638
1639   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1640   if (!ok)
1641     {
1642       if (dump_enabled_p ())
1643         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1644                          "bad data access.\n");
1645       return false;
1646     }
1647
1648   /* Classify all cross-iteration scalar data-flow cycles.
1649      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1650
1651   vect_analyze_scalar_cycles (loop_vinfo);
1652
1653   vect_pattern_recog (loop_vinfo, NULL);
1654
1655   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1656
1657   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1658   if (!ok)
1659     {
1660       if (dump_enabled_p ())
1661         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1662                          "unexpected pattern.\n");
1663       return false;
1664     }
1665
1666   /* Analyze data dependences between the data-refs in the loop
1667      and adjust the maximum vectorization factor according to
1668      the dependences.
1669      FORNOW: fail at the first data dependence that we encounter.  */
1670
1671   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1672   if (!ok
1673       || max_vf < min_vf)
1674     {
1675       if (dump_enabled_p ())
1676             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1677                              "bad data dependence.\n");
1678       return false;
1679     }
1680
1681   ok = vect_determine_vectorization_factor (loop_vinfo);
1682   if (!ok)
1683     {
1684       if (dump_enabled_p ())
1685         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                          "can't determine vectorization factor.\n");
1687       return false;
1688     }
1689   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1690     {
1691       if (dump_enabled_p ())
1692         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1693                          "bad data dependence.\n");
1694       return false;
1695     }
1696
1697   /* Analyze the alignment of the data-refs in the loop.
1698      Fail if a data reference is found that cannot be vectorized.  */
1699
1700   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1701   if (!ok)
1702     {
1703       if (dump_enabled_p ())
1704         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1705                          "bad data alignment.\n");
1706       return false;
1707     }
1708
1709   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1710      It is important to call pruning after vect_analyze_data_ref_accesses,
1711      since we use grouping information gathered by interleaving analysis.  */
1712   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1713   if (!ok)
1714     {
1715       if (dump_enabled_p ())
1716         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1717                          "too long list of versioning for alias "
1718                          "run-time tests.\n");
1719       return false;
1720     }
1721
1722   /* This pass will decide on using loop versioning and/or loop peeling in
1723      order to enhance the alignment of data references in the loop.  */
1724
1725   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1726   if (!ok)
1727     {
1728       if (dump_enabled_p ())
1729         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1730                          "bad data alignment.\n");
1731       return false;
1732     }
1733
1734   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1735   ok = vect_analyze_slp (loop_vinfo, NULL);
1736   if (ok)
1737     {
1738       /* Decide which possible SLP instances to SLP.  */
1739       slp = vect_make_slp_decision (loop_vinfo);
1740
1741       /* Find stmts that need to be both vectorized and SLPed.  */
1742       vect_detect_hybrid_slp (loop_vinfo);
1743     }
1744   else
1745     return false;
1746
1747   /* Scan all the operations in the loop and make sure they are
1748      vectorizable.  */
1749
1750   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1751   if (!ok)
1752     {
1753       if (dump_enabled_p ())
1754         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1755                          "bad operation or unsupported loop bound.\n");
1756       return false;
1757     }
1758
1759   return true;
1760 }
1761
1762 /* Function vect_analyze_loop.
1763
1764    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1765    for it.  The different analyses will record information in the
1766    loop_vec_info struct.  */
1767 loop_vec_info
1768 vect_analyze_loop (struct loop *loop)
1769 {
1770   loop_vec_info loop_vinfo;
1771   unsigned int vector_sizes;
1772
1773   /* Autodetect first vector size we try.  */
1774   current_vector_size = 0;
1775   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1776
1777   if (dump_enabled_p ())
1778     dump_printf_loc (MSG_NOTE, vect_location,
1779                      "===== analyze_loop_nest =====\n");
1780
1781   if (loop_outer (loop)
1782       && loop_vec_info_for_loop (loop_outer (loop))
1783       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1784     {
1785       if (dump_enabled_p ())
1786         dump_printf_loc (MSG_NOTE, vect_location,
1787                          "outer-loop already vectorized.\n");
1788       return NULL;
1789     }
1790
1791   while (1)
1792     {
1793       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1794       loop_vinfo = vect_analyze_loop_form (loop);
1795       if (!loop_vinfo)
1796         {
1797           if (dump_enabled_p ())
1798             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1799                              "bad loop form.\n");
1800           return NULL;
1801         }
1802
1803       if (vect_analyze_loop_2 (loop_vinfo))
1804         {
1805           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1806
1807           return loop_vinfo;
1808         }
1809
1810       destroy_loop_vec_info (loop_vinfo, true);
1811
1812       vector_sizes &= ~current_vector_size;
1813       if (vector_sizes == 0
1814           || current_vector_size == 0)
1815         return NULL;
1816
1817       /* Try the next biggest vector size.  */
1818       current_vector_size = 1 << floor_log2 (vector_sizes);
1819       if (dump_enabled_p ())
1820         dump_printf_loc (MSG_NOTE, vect_location,
1821                          "***** Re-trying analysis with "
1822                          "vector size %d\n", current_vector_size);
1823     }
1824 }
1825
1826
1827 /* Function reduction_code_for_scalar_code
1828
1829    Input:
1830    CODE - tree_code of a reduction operations.
1831
1832    Output:
1833    REDUC_CODE - the corresponding tree-code to be used to reduce the
1834       vector of partial results into a single scalar result (which
1835       will also reside in a vector) or ERROR_MARK if the operation is
1836       a supported reduction operation, but does not have such tree-code.
1837
1838    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1839
1840 static bool
1841 reduction_code_for_scalar_code (enum tree_code code,
1842                                 enum tree_code *reduc_code)
1843 {
1844   switch (code)
1845     {
1846       case MAX_EXPR:
1847         *reduc_code = REDUC_MAX_EXPR;
1848         return true;
1849
1850       case MIN_EXPR:
1851         *reduc_code = REDUC_MIN_EXPR;
1852         return true;
1853
1854       case PLUS_EXPR:
1855         *reduc_code = REDUC_PLUS_EXPR;
1856         return true;
1857
1858       case MULT_EXPR:
1859       case MINUS_EXPR:
1860       case BIT_IOR_EXPR:
1861       case BIT_XOR_EXPR:
1862       case BIT_AND_EXPR:
1863         *reduc_code = ERROR_MARK;
1864         return true;
1865
1866       default:
1867        return false;
1868     }
1869 }
1870
1871
1872 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1873    STMT is printed with a message MSG. */
1874
1875 static void
1876 report_vect_op (int msg_type, gimple stmt, const char *msg)
1877 {
1878   dump_printf_loc (msg_type, vect_location, "%s", msg);
1879   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1880   dump_printf (msg_type, "\n");
1881 }
1882
1883
1884 /* Detect SLP reduction of the form:
1885
1886    #a1 = phi <a5, a0>
1887    a2 = operation (a1)
1888    a3 = operation (a2)
1889    a4 = operation (a3)
1890    a5 = operation (a4)
1891
1892    #a = phi <a5>
1893
1894    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1895    FIRST_STMT is the first reduction stmt in the chain
1896    (a2 = operation (a1)).
1897
1898    Return TRUE if a reduction chain was detected.  */
1899
1900 static bool
1901 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1902 {
1903   struct loop *loop = (gimple_bb (phi))->loop_father;
1904   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1905   enum tree_code code;
1906   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1907   stmt_vec_info use_stmt_info, current_stmt_info;
1908   tree lhs;
1909   imm_use_iterator imm_iter;
1910   use_operand_p use_p;
1911   int nloop_uses, size = 0, n_out_of_loop_uses;
1912   bool found = false;
1913
1914   if (loop != vect_loop)
1915     return false;
1916
1917   lhs = PHI_RESULT (phi);
1918   code = gimple_assign_rhs_code (first_stmt);
1919   while (1)
1920     {
1921       nloop_uses = 0;
1922       n_out_of_loop_uses = 0;
1923       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1924         {
1925           gimple use_stmt = USE_STMT (use_p);
1926           if (is_gimple_debug (use_stmt))
1927             continue;
1928
1929           use_stmt = USE_STMT (use_p);
1930
1931           /* Check if we got back to the reduction phi.  */
1932           if (use_stmt == phi)
1933             {
1934               loop_use_stmt = use_stmt;
1935               found = true;
1936               break;
1937             }
1938
1939           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1940             {
1941               if (vinfo_for_stmt (use_stmt)
1942                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1943                 {
1944                   loop_use_stmt = use_stmt;
1945                   nloop_uses++;
1946                 }
1947             }
1948            else
1949              n_out_of_loop_uses++;
1950
1951            /* There are can be either a single use in the loop or two uses in
1952               phi nodes.  */
1953            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1954              return false;
1955         }
1956
1957       if (found)
1958         break;
1959
1960       /* We reached a statement with no loop uses.  */
1961       if (nloop_uses == 0)
1962         return false;
1963
1964       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1965       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1966         return false;
1967
1968       if (!is_gimple_assign (loop_use_stmt)
1969           || code != gimple_assign_rhs_code (loop_use_stmt)
1970           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1971         return false;
1972
1973       /* Insert USE_STMT into reduction chain.  */
1974       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1975       if (current_stmt)
1976         {
1977           current_stmt_info = vinfo_for_stmt (current_stmt);
1978           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1979           GROUP_FIRST_ELEMENT (use_stmt_info)
1980             = GROUP_FIRST_ELEMENT (current_stmt_info);
1981         }
1982       else
1983         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1984
1985       lhs = gimple_assign_lhs (loop_use_stmt);
1986       current_stmt = loop_use_stmt;
1987       size++;
1988    }
1989
1990   if (!found || loop_use_stmt != phi || size < 2)
1991     return false;
1992
1993   /* Swap the operands, if needed, to make the reduction operand be the second
1994      operand.  */
1995   lhs = PHI_RESULT (phi);
1996   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1997   while (next_stmt)
1998     {
1999       if (gimple_assign_rhs2 (next_stmt) == lhs)
2000         {
2001           tree op = gimple_assign_rhs1 (next_stmt);
2002           gimple def_stmt = NULL;
2003
2004           if (TREE_CODE (op) == SSA_NAME)
2005             def_stmt = SSA_NAME_DEF_STMT (op);
2006
2007           /* Check that the other def is either defined in the loop
2008              ("vect_internal_def"), or it's an induction (defined by a
2009              loop-header phi-node).  */
2010           if (def_stmt
2011               && gimple_bb (def_stmt)
2012               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2013               && (is_gimple_assign (def_stmt)
2014                   || is_gimple_call (def_stmt)
2015                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2016                            == vect_induction_def
2017                   || (gimple_code (def_stmt) == GIMPLE_PHI
2018                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2019                                   == vect_internal_def
2020                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2021             {
2022               lhs = gimple_assign_lhs (next_stmt);
2023               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2024               continue;
2025             }
2026
2027           return false;
2028         }
2029       else
2030         {
2031           tree op = gimple_assign_rhs2 (next_stmt);
2032           gimple def_stmt = NULL;
2033
2034           if (TREE_CODE (op) == SSA_NAME)
2035             def_stmt = SSA_NAME_DEF_STMT (op);
2036
2037           /* Check that the other def is either defined in the loop
2038             ("vect_internal_def"), or it's an induction (defined by a
2039             loop-header phi-node).  */
2040           if (def_stmt
2041               && gimple_bb (def_stmt)
2042               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2043               && (is_gimple_assign (def_stmt)
2044                   || is_gimple_call (def_stmt)
2045                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2046                               == vect_induction_def
2047                   || (gimple_code (def_stmt) == GIMPLE_PHI
2048                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2049                                   == vect_internal_def
2050                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2051             {
2052               if (dump_enabled_p ())
2053                 {
2054                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2055                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2056                   dump_printf (MSG_NOTE, "\n");
2057                 }
2058
2059               swap_ssa_operands (next_stmt,
2060                                  gimple_assign_rhs1_ptr (next_stmt),
2061                                  gimple_assign_rhs2_ptr (next_stmt));
2062               update_stmt (next_stmt);
2063
2064               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2065                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2066             }
2067           else
2068             return false;
2069         }
2070
2071       lhs = gimple_assign_lhs (next_stmt);
2072       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2073     }
2074
2075   /* Save the chain for further analysis in SLP detection.  */
2076   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2077   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2078   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2079
2080   return true;
2081 }
2082
2083
2084 /* Function vect_is_simple_reduction_1
2085
2086    (1) Detect a cross-iteration def-use cycle that represents a simple
2087    reduction computation.  We look for the following pattern:
2088
2089    loop_header:
2090      a1 = phi < a0, a2 >
2091      a3 = ...
2092      a2 = operation (a3, a1)
2093
2094    or
2095
2096    a3 = ...
2097    loop_header:
2098      a1 = phi < a0, a2 >
2099      a2 = operation (a3, a1)
2100
2101    such that:
2102    1. operation is commutative and associative and it is safe to
2103       change the order of the computation (if CHECK_REDUCTION is true)
2104    2. no uses for a2 in the loop (a2 is used out of the loop)
2105    3. no uses of a1 in the loop besides the reduction operation
2106    4. no uses of a1 outside the loop.
2107
2108    Conditions 1,4 are tested here.
2109    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2110
2111    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2112    nested cycles, if CHECK_REDUCTION is false.
2113
2114    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2115    reductions:
2116
2117      a1 = phi < a0, a2 >
2118      inner loop (def of a3)
2119      a2 = phi < a3 >
2120
2121    If MODIFY is true it tries also to rework the code in-place to enable
2122    detection of more reduction patterns.  For the time being we rewrite
2123    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2124 */
2125
2126 static gimple
2127 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2128                             bool check_reduction, bool *double_reduc,
2129                             bool modify)
2130 {
2131   struct loop *loop = (gimple_bb (phi))->loop_father;
2132   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2133   edge latch_e = loop_latch_edge (loop);
2134   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2135   gimple def_stmt, def1 = NULL, def2 = NULL;
2136   enum tree_code orig_code, code;
2137   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2138   tree type;
2139   int nloop_uses;
2140   tree name;
2141   imm_use_iterator imm_iter;
2142   use_operand_p use_p;
2143   bool phi_def;
2144
2145   *double_reduc = false;
2146
2147   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2148      otherwise, we assume outer loop vectorization.  */
2149   gcc_assert ((check_reduction && loop == vect_loop)
2150               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2151
2152   name = PHI_RESULT (phi);
2153   nloop_uses = 0;
2154   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2155     {
2156       gimple use_stmt = USE_STMT (use_p);
2157       if (is_gimple_debug (use_stmt))
2158         continue;
2159
2160       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2161         {
2162           if (dump_enabled_p ())
2163             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2164                              "intermediate value used outside loop.\n");
2165
2166           return NULL;
2167         }
2168
2169       if (vinfo_for_stmt (use_stmt)
2170           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2171         nloop_uses++;
2172       if (nloop_uses > 1)
2173         {
2174           if (dump_enabled_p ())
2175             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2176                              "reduction used in loop.\n");
2177           return NULL;
2178         }
2179     }
2180
2181   if (TREE_CODE (loop_arg) != SSA_NAME)
2182     {
2183       if (dump_enabled_p ())
2184         {
2185           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2186                            "reduction: not ssa_name: ");
2187           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2188           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2189         }
2190       return NULL;
2191     }
2192
2193   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2194   if (!def_stmt)
2195     {
2196       if (dump_enabled_p ())
2197         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2198                          "reduction: no def_stmt.\n");
2199       return NULL;
2200     }
2201
2202   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2203     {
2204       if (dump_enabled_p ())
2205         {
2206           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2207           dump_printf (MSG_NOTE, "\n");
2208         }
2209       return NULL;
2210     }
2211
2212   if (is_gimple_assign (def_stmt))
2213     {
2214       name = gimple_assign_lhs (def_stmt);
2215       phi_def = false;
2216     }
2217   else
2218     {
2219       name = PHI_RESULT (def_stmt);
2220       phi_def = true;
2221     }
2222
2223   nloop_uses = 0;
2224   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2225     {
2226       gimple use_stmt = USE_STMT (use_p);
2227       if (is_gimple_debug (use_stmt))
2228         continue;
2229       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2230           && vinfo_for_stmt (use_stmt)
2231           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2232         nloop_uses++;
2233       if (nloop_uses > 1)
2234         {
2235           if (dump_enabled_p ())
2236             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237                              "reduction used in loop.\n");
2238           return NULL;
2239         }
2240     }
2241
2242   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2243      defined in the inner loop.  */
2244   if (phi_def)
2245     {
2246       op1 = PHI_ARG_DEF (def_stmt, 0);
2247
2248       if (gimple_phi_num_args (def_stmt) != 1
2249           || TREE_CODE (op1) != SSA_NAME)
2250         {
2251           if (dump_enabled_p ())
2252             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2253                              "unsupported phi node definition.\n");
2254
2255           return NULL;
2256         }
2257
2258       def1 = SSA_NAME_DEF_STMT (op1);
2259       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2260           && loop->inner
2261           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2262           && is_gimple_assign (def1))
2263         {
2264           if (dump_enabled_p ())
2265             report_vect_op (MSG_NOTE, def_stmt,
2266                             "detected double reduction: ");
2267
2268           *double_reduc = true;
2269           return def_stmt;
2270         }
2271
2272       return NULL;
2273     }
2274
2275   code = orig_code = gimple_assign_rhs_code (def_stmt);
2276
2277   /* We can handle "res -= x[i]", which is non-associative by
2278      simply rewriting this into "res += -x[i]".  Avoid changing
2279      gimple instruction for the first simple tests and only do this
2280      if we're allowed to change code at all.  */
2281   if (code == MINUS_EXPR
2282       && modify
2283       && (op1 = gimple_assign_rhs1 (def_stmt))
2284       && TREE_CODE (op1) == SSA_NAME
2285       && SSA_NAME_DEF_STMT (op1) == phi)
2286     code = PLUS_EXPR;
2287
2288   if (check_reduction
2289       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2290     {
2291       if (dump_enabled_p ())
2292         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2293                         "reduction: not commutative/associative: ");
2294       return NULL;
2295     }
2296
2297   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2298     {
2299       if (code != COND_EXPR)
2300         {
2301           if (dump_enabled_p ())
2302             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2303                             "reduction: not binary operation: ");
2304
2305           return NULL;
2306         }
2307
2308       op3 = gimple_assign_rhs1 (def_stmt);
2309       if (COMPARISON_CLASS_P (op3))
2310         {
2311           op4 = TREE_OPERAND (op3, 1);
2312           op3 = TREE_OPERAND (op3, 0);
2313         }
2314
2315       op1 = gimple_assign_rhs2 (def_stmt);
2316       op2 = gimple_assign_rhs3 (def_stmt);
2317
2318       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2319         {
2320           if (dump_enabled_p ())
2321             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2322                             "reduction: uses not ssa_names: ");
2323
2324           return NULL;
2325         }
2326     }
2327   else
2328     {
2329       op1 = gimple_assign_rhs1 (def_stmt);
2330       op2 = gimple_assign_rhs2 (def_stmt);
2331
2332       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2333         {
2334           if (dump_enabled_p ())
2335             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2336                             "reduction: uses not ssa_names: ");
2337
2338           return NULL;
2339         }
2340    }
2341
2342   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2343   if ((TREE_CODE (op1) == SSA_NAME
2344        && !types_compatible_p (type,TREE_TYPE (op1)))
2345       || (TREE_CODE (op2) == SSA_NAME
2346           && !types_compatible_p (type, TREE_TYPE (op2)))
2347       || (op3 && TREE_CODE (op3) == SSA_NAME
2348           && !types_compatible_p (type, TREE_TYPE (op3)))
2349       || (op4 && TREE_CODE (op4) == SSA_NAME
2350           && !types_compatible_p (type, TREE_TYPE (op4))))
2351     {
2352       if (dump_enabled_p ())
2353         {
2354           dump_printf_loc (MSG_NOTE, vect_location,
2355                            "reduction: multiple types: operation type: ");
2356           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2357           dump_printf (MSG_NOTE, ", operands types: ");
2358           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2359                              TREE_TYPE (op1));
2360           dump_printf (MSG_NOTE, ",");
2361           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2362                              TREE_TYPE (op2));
2363           if (op3)
2364             {
2365               dump_printf (MSG_NOTE, ",");
2366               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2367                                  TREE_TYPE (op3));
2368             }
2369
2370           if (op4)
2371             {
2372               dump_printf (MSG_NOTE, ",");
2373               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2374                                  TREE_TYPE (op4));
2375             }
2376           dump_printf (MSG_NOTE, "\n");
2377         }
2378
2379       return NULL;
2380     }
2381
2382   /* Check that it's ok to change the order of the computation.
2383      Generally, when vectorizing a reduction we change the order of the
2384      computation.  This may change the behavior of the program in some
2385      cases, so we need to check that this is ok.  One exception is when
2386      vectorizing an outer-loop: the inner-loop is executed sequentially,
2387      and therefore vectorizing reductions in the inner-loop during
2388      outer-loop vectorization is safe.  */
2389
2390   /* CHECKME: check for !flag_finite_math_only too?  */
2391   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2392       && check_reduction)
2393     {
2394       /* Changing the order of operations changes the semantics.  */
2395       if (dump_enabled_p ())
2396         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2397                         "reduction: unsafe fp math optimization: ");
2398       return NULL;
2399     }
2400   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2401            && check_reduction)
2402     {
2403       /* Changing the order of operations changes the semantics.  */
2404       if (dump_enabled_p ())
2405         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2406                         "reduction: unsafe int math optimization: ");
2407       return NULL;
2408     }
2409   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2410     {
2411       /* Changing the order of operations changes the semantics.  */
2412       if (dump_enabled_p ())
2413         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2414                         "reduction: unsafe fixed-point math optimization: ");
2415       return NULL;
2416     }
2417
2418   /* If we detected "res -= x[i]" earlier, rewrite it into
2419      "res += -x[i]" now.  If this turns out to be useless reassoc
2420      will clean it up again.  */
2421   if (orig_code == MINUS_EXPR)
2422     {
2423       tree rhs = gimple_assign_rhs2 (def_stmt);
2424       tree negrhs = make_ssa_name (TREE_TYPE (rhs), NULL);
2425       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2426                                                          rhs, NULL);
2427       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2428       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2429                                                           loop_info, NULL));
2430       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2431       gimple_assign_set_rhs2 (def_stmt, negrhs);
2432       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2433       update_stmt (def_stmt);
2434     }
2435
2436   /* Reduction is safe. We're dealing with one of the following:
2437      1) integer arithmetic and no trapv
2438      2) floating point arithmetic, and special flags permit this optimization
2439      3) nested cycle (i.e., outer loop vectorization).  */
2440   if (TREE_CODE (op1) == SSA_NAME)
2441     def1 = SSA_NAME_DEF_STMT (op1);
2442
2443   if (TREE_CODE (op2) == SSA_NAME)
2444     def2 = SSA_NAME_DEF_STMT (op2);
2445
2446   if (code != COND_EXPR
2447       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2448     {
2449       if (dump_enabled_p ())
2450         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2451       return NULL;
2452     }
2453
2454   /* Check that one def is the reduction def, defined by PHI,
2455      the other def is either defined in the loop ("vect_internal_def"),
2456      or it's an induction (defined by a loop-header phi-node).  */
2457
2458   if (def2 && def2 == phi
2459       && (code == COND_EXPR
2460           || !def1 || gimple_nop_p (def1)
2461           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2462           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2463               && (is_gimple_assign (def1)
2464                   || is_gimple_call (def1)
2465                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2466                       == vect_induction_def
2467                   || (gimple_code (def1) == GIMPLE_PHI
2468                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2469                           == vect_internal_def
2470                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2471     {
2472       if (dump_enabled_p ())
2473         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2474       return def_stmt;
2475     }
2476
2477   if (def1 && def1 == phi
2478       && (code == COND_EXPR
2479           || !def2 || gimple_nop_p (def2)
2480           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2481           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2482               && (is_gimple_assign (def2)
2483                   || is_gimple_call (def2)
2484                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2485                       == vect_induction_def
2486                   || (gimple_code (def2) == GIMPLE_PHI
2487                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2488                           == vect_internal_def
2489                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2490     {
2491       if (check_reduction)
2492         {
2493           /* Swap operands (just for simplicity - so that the rest of the code
2494              can assume that the reduction variable is always the last (second)
2495              argument).  */
2496           if (dump_enabled_p ())
2497             report_vect_op (MSG_NOTE, def_stmt,
2498                             "detected reduction: need to swap operands: ");
2499
2500           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2501                              gimple_assign_rhs2_ptr (def_stmt));
2502
2503           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2504             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2505         }
2506       else
2507         {
2508           if (dump_enabled_p ())
2509             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2510         }
2511
2512       return def_stmt;
2513     }
2514
2515   /* Try to find SLP reduction chain.  */
2516   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2517     {
2518       if (dump_enabled_p ())
2519         report_vect_op (MSG_NOTE, def_stmt,
2520                         "reduction: detected reduction chain: ");
2521
2522       return def_stmt;
2523     }
2524
2525   if (dump_enabled_p ())
2526     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2527                     "reduction: unknown pattern: ");
2528
2529   return NULL;
2530 }
2531
2532 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2533    in-place.  Arguments as there.  */
2534
2535 static gimple
2536 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2537                           bool check_reduction, bool *double_reduc)
2538 {
2539   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2540                                      double_reduc, false);
2541 }
2542
2543 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2544    in-place if it enables detection of more reductions.  Arguments
2545    as there.  */
2546
2547 gimple
2548 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2549                           bool check_reduction, bool *double_reduc)
2550 {
2551   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2552                                      double_reduc, true);
2553 }
2554
2555 /* Calculate the cost of one scalar iteration of the loop.  */
2556 int
2557 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2558 {
2559   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2560   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2561   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2562   int innerloop_iters, i, stmt_cost;
2563
2564   /* Count statements in scalar loop.  Using this as scalar cost for a single
2565      iteration for now.
2566
2567      TODO: Add outer loop support.
2568
2569      TODO: Consider assigning different costs to different scalar
2570      statements.  */
2571
2572   /* FORNOW.  */
2573   innerloop_iters = 1;
2574   if (loop->inner)
2575     innerloop_iters = 50; /* FIXME */
2576
2577   for (i = 0; i < nbbs; i++)
2578     {
2579       gimple_stmt_iterator si;
2580       basic_block bb = bbs[i];
2581
2582       if (bb->loop_father == loop->inner)
2583         factor = innerloop_iters;
2584       else
2585         factor = 1;
2586
2587       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2588         {
2589           gimple stmt = gsi_stmt (si);
2590           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2591
2592           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2593             continue;
2594
2595           /* Skip stmts that are not vectorized inside the loop.  */
2596           if (stmt_info
2597               && !STMT_VINFO_RELEVANT_P (stmt_info)
2598               && (!STMT_VINFO_LIVE_P (stmt_info)
2599                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2600               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2601             continue;
2602
2603           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2604             {
2605               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2606                stmt_cost = vect_get_stmt_cost (scalar_load);
2607              else
2608                stmt_cost = vect_get_stmt_cost (scalar_store);
2609             }
2610           else
2611             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2612
2613           scalar_single_iter_cost += stmt_cost * factor;
2614         }
2615     }
2616   return scalar_single_iter_cost;
2617 }
2618
2619 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2620 int
2621 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2622                              int *peel_iters_epilogue,
2623                              int scalar_single_iter_cost,
2624                              stmt_vector_for_cost *prologue_cost_vec,
2625                              stmt_vector_for_cost *epilogue_cost_vec)
2626 {
2627   int retval = 0;
2628   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2629
2630   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2631     {
2632       *peel_iters_epilogue = vf/2;
2633       if (dump_enabled_p ())
2634         dump_printf_loc (MSG_NOTE, vect_location,
2635                          "cost model: epilogue peel iters set to vf/2 "
2636                          "because loop iterations are unknown .\n");
2637
2638       /* If peeled iterations are known but number of scalar loop
2639          iterations are unknown, count a taken branch per peeled loop.  */
2640       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2641                                  NULL, 0, vect_prologue);
2642     }
2643   else
2644     {
2645       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2646       peel_iters_prologue = niters < peel_iters_prologue ?
2647                             niters : peel_iters_prologue;
2648       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2649       /* If we need to peel for gaps, but no peeling is required, we have to
2650          peel VF iterations.  */
2651       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2652         *peel_iters_epilogue = vf;
2653     }
2654
2655   if (peel_iters_prologue)
2656     retval += record_stmt_cost (prologue_cost_vec,
2657                                 peel_iters_prologue * scalar_single_iter_cost,
2658                                 scalar_stmt, NULL, 0, vect_prologue);
2659   if (*peel_iters_epilogue)
2660     retval += record_stmt_cost (epilogue_cost_vec,
2661                                 *peel_iters_epilogue * scalar_single_iter_cost,
2662                                 scalar_stmt, NULL, 0, vect_epilogue);
2663   return retval;
2664 }
2665
2666 /* Function vect_estimate_min_profitable_iters
2667
2668    Return the number of iterations required for the vector version of the
2669    loop to be profitable relative to the cost of the scalar version of the
2670    loop.  */
2671
2672 static void
2673 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2674                                     int *ret_min_profitable_niters,
2675                                     int *ret_min_profitable_estimate)
2676 {
2677   int min_profitable_iters;
2678   int min_profitable_estimate;
2679   int peel_iters_prologue;
2680   int peel_iters_epilogue;
2681   unsigned vec_inside_cost = 0;
2682   int vec_outside_cost = 0;
2683   unsigned vec_prologue_cost = 0;
2684   unsigned vec_epilogue_cost = 0;
2685   int scalar_single_iter_cost = 0;
2686   int scalar_outside_cost = 0;
2687   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2688   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2689   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2690
2691   /* Cost model disabled.  */
2692   if (unlimited_cost_model ())
2693     {
2694       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2695       *ret_min_profitable_niters = 0;
2696       *ret_min_profitable_estimate = 0;
2697       return;
2698     }
2699
2700   /* Requires loop versioning tests to handle misalignment.  */
2701   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2702     {
2703       /*  FIXME: Make cost depend on complexity of individual check.  */
2704       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2705       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2706                             vect_prologue);
2707       dump_printf (MSG_NOTE,
2708                    "cost model: Adding cost of checks for loop "
2709                    "versioning to treat misalignment.\n");
2710     }
2711
2712   /* Requires loop versioning with alias checks.  */
2713   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2714     {
2715       /*  FIXME: Make cost depend on complexity of individual check.  */
2716       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2717       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2718                             vect_prologue);
2719       dump_printf (MSG_NOTE,
2720                    "cost model: Adding cost of checks for loop "
2721                    "versioning aliasing.\n");
2722     }
2723
2724   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2725       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2726     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2727                           vect_prologue);
2728
2729   /* Count statements in scalar loop.  Using this as scalar cost for a single
2730      iteration for now.
2731
2732      TODO: Add outer loop support.
2733
2734      TODO: Consider assigning different costs to different scalar
2735      statements.  */
2736
2737   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2738
2739   /* Add additional cost for the peeled instructions in prologue and epilogue
2740      loop.
2741
2742      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2743      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2744
2745      TODO: Build an expression that represents peel_iters for prologue and
2746      epilogue to be used in a run-time test.  */
2747
2748   if (npeel  < 0)
2749     {
2750       peel_iters_prologue = vf/2;
2751       dump_printf (MSG_NOTE, "cost model: "
2752                    "prologue peel iters set to vf/2.\n");
2753
2754       /* If peeling for alignment is unknown, loop bound of main loop becomes
2755          unknown.  */
2756       peel_iters_epilogue = vf/2;
2757       dump_printf (MSG_NOTE, "cost model: "
2758                    "epilogue peel iters set to vf/2 because "
2759                    "peeling for alignment is unknown.\n");
2760
2761       /* If peeled iterations are unknown, count a taken branch and a not taken
2762          branch per peeled loop. Even if scalar loop iterations are known,
2763          vector iterations are not known since peeled prologue iterations are
2764          not known. Hence guards remain the same.  */
2765       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2766                             NULL, 0, vect_prologue);
2767       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2768                             NULL, 0, vect_prologue);
2769       /* FORNOW: Don't attempt to pass individual scalar instructions to
2770          the model; just assume linear cost for scalar iterations.  */
2771       (void) add_stmt_cost (target_cost_data,
2772                             peel_iters_prologue * scalar_single_iter_cost,
2773                             scalar_stmt, NULL, 0, vect_prologue);
2774       (void) add_stmt_cost (target_cost_data,
2775                             peel_iters_epilogue * scalar_single_iter_cost,
2776                             scalar_stmt, NULL, 0, vect_epilogue);
2777     }
2778   else
2779     {
2780       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2781       stmt_info_for_cost *si;
2782       int j;
2783       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2784
2785       prologue_cost_vec.create (2);
2786       epilogue_cost_vec.create (2);
2787       peel_iters_prologue = npeel;
2788
2789       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2790                                           &peel_iters_epilogue,
2791                                           scalar_single_iter_cost,
2792                                           &prologue_cost_vec,
2793                                           &epilogue_cost_vec);
2794
2795       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2796         {
2797           struct _stmt_vec_info *stmt_info
2798             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2799           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2800                                 si->misalign, vect_prologue);
2801         }
2802
2803       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2804         {
2805           struct _stmt_vec_info *stmt_info
2806             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2807           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2808                                 si->misalign, vect_epilogue);
2809         }
2810
2811       prologue_cost_vec.release ();
2812       epilogue_cost_vec.release ();
2813     }
2814
2815   /* FORNOW: The scalar outside cost is incremented in one of the
2816      following ways:
2817
2818      1. The vectorizer checks for alignment and aliasing and generates
2819      a condition that allows dynamic vectorization.  A cost model
2820      check is ANDED with the versioning condition.  Hence scalar code
2821      path now has the added cost of the versioning check.
2822
2823        if (cost > th & versioning_check)
2824          jmp to vector code
2825
2826      Hence run-time scalar is incremented by not-taken branch cost.
2827
2828      2. The vectorizer then checks if a prologue is required.  If the
2829      cost model check was not done before during versioning, it has to
2830      be done before the prologue check.
2831
2832        if (cost <= th)
2833          prologue = scalar_iters
2834        if (prologue == 0)
2835          jmp to vector code
2836        else
2837          execute prologue
2838        if (prologue == num_iters)
2839          go to exit
2840
2841      Hence the run-time scalar cost is incremented by a taken branch,
2842      plus a not-taken branch, plus a taken branch cost.
2843
2844      3. The vectorizer then checks if an epilogue is required.  If the
2845      cost model check was not done before during prologue check, it
2846      has to be done with the epilogue check.
2847
2848        if (prologue == 0)
2849          jmp to vector code
2850        else
2851          execute prologue
2852        if (prologue == num_iters)
2853          go to exit
2854        vector code:
2855          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2856            jmp to epilogue
2857
2858      Hence the run-time scalar cost should be incremented by 2 taken
2859      branches.
2860
2861      TODO: The back end may reorder the BBS's differently and reverse
2862      conditions/branch directions.  Change the estimates below to
2863      something more reasonable.  */
2864
2865   /* If the number of iterations is known and we do not do versioning, we can
2866      decide whether to vectorize at compile time.  Hence the scalar version
2867      do not carry cost model guard costs.  */
2868   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2869       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2870       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2871     {
2872       /* Cost model check occurs at versioning.  */
2873       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2874           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2875         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2876       else
2877         {
2878           /* Cost model check occurs at prologue generation.  */
2879           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2880             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2881               + vect_get_stmt_cost (cond_branch_not_taken);
2882           /* Cost model check occurs at epilogue generation.  */
2883           else
2884             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2885         }
2886     }
2887
2888   /* Complete the target-specific cost calculations.  */
2889   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2890                &vec_inside_cost, &vec_epilogue_cost);
2891
2892   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2893
2894   /* Calculate number of iterations required to make the vector version
2895      profitable, relative to the loop bodies only.  The following condition
2896      must hold true:
2897      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2898      where
2899      SIC = scalar iteration cost, VIC = vector iteration cost,
2900      VOC = vector outside cost, VF = vectorization factor,
2901      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2902      SOC = scalar outside cost for run time cost model check.  */
2903
2904   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2905     {
2906       if (vec_outside_cost <= 0)
2907         min_profitable_iters = 1;
2908       else
2909         {
2910           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2911                                   - vec_inside_cost * peel_iters_prologue
2912                                   - vec_inside_cost * peel_iters_epilogue)
2913                                  / ((scalar_single_iter_cost * vf)
2914                                     - vec_inside_cost);
2915
2916           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2917               <= (((int) vec_inside_cost * min_profitable_iters)
2918                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2919             min_profitable_iters++;
2920         }
2921     }
2922   /* vector version will never be profitable.  */
2923   else
2924     {
2925       if (dump_enabled_p ())
2926         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2927                          "cost model: the vector iteration cost = %d "
2928                          "divided by the scalar iteration cost = %d "
2929                          "is greater or equal to the vectorization factor = %d"
2930                          ".\n",
2931                          vec_inside_cost, scalar_single_iter_cost, vf);
2932       *ret_min_profitable_niters = -1;
2933       *ret_min_profitable_estimate = -1;
2934       return;
2935     }
2936
2937   if (dump_enabled_p ())
2938     {
2939       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2940       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2941                    vec_inside_cost);
2942       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2943                    vec_prologue_cost);
2944       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2945                    vec_epilogue_cost);
2946       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2947                    scalar_single_iter_cost);
2948       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2949                    scalar_outside_cost);
2950       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2951                    vec_outside_cost);
2952       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2953                    peel_iters_prologue);
2954       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2955                    peel_iters_epilogue);
2956       dump_printf (MSG_NOTE,
2957                    "  Calculated minimum iters for profitability: %d\n",
2958                    min_profitable_iters);
2959       dump_printf (MSG_NOTE, "\n");
2960     }
2961
2962   min_profitable_iters =
2963         min_profitable_iters < vf ? vf : min_profitable_iters;
2964
2965   /* Because the condition we create is:
2966      if (niters <= min_profitable_iters)
2967        then skip the vectorized loop.  */
2968   min_profitable_iters--;
2969
2970   if (dump_enabled_p ())
2971     dump_printf_loc (MSG_NOTE, vect_location,
2972                      "  Runtime profitability threshold = %d\n",
2973                      min_profitable_iters);
2974
2975   *ret_min_profitable_niters = min_profitable_iters;
2976
2977   /* Calculate number of iterations required to make the vector version
2978      profitable, relative to the loop bodies only.
2979
2980      Non-vectorized variant is SIC * niters and it must win over vector
2981      variant on the expected loop trip count.  The following condition must hold true:
2982      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2983
2984   if (vec_outside_cost <= 0)
2985     min_profitable_estimate = 1;
2986   else
2987     {
2988       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2989                                  - vec_inside_cost * peel_iters_prologue
2990                                  - vec_inside_cost * peel_iters_epilogue)
2991                                  / ((scalar_single_iter_cost * vf)
2992                                    - vec_inside_cost);
2993     }
2994   min_profitable_estimate --;
2995   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
2996   if (dump_enabled_p ())
2997     dump_printf_loc (MSG_NOTE, vect_location,
2998                      "  Static estimate profitability threshold = %d\n",
2999                       min_profitable_iters);
3000
3001   *ret_min_profitable_estimate = min_profitable_estimate;
3002 }
3003
3004
3005 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3006    functions. Design better to avoid maintenance issues.  */
3007
3008 /* Function vect_model_reduction_cost.
3009
3010    Models cost for a reduction operation, including the vector ops
3011    generated within the strip-mine loop, the initial definition before
3012    the loop, and the epilogue code that must be generated.  */
3013
3014 static bool
3015 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3016                            int ncopies)
3017 {
3018   int prologue_cost = 0, epilogue_cost = 0;
3019   enum tree_code code;
3020   optab optab;
3021   tree vectype;
3022   gimple stmt, orig_stmt;
3023   tree reduction_op;
3024   enum machine_mode mode;
3025   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3026   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3027   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3028
3029   /* Cost of reduction op inside loop.  */
3030   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3031                                         stmt_info, 0, vect_body);
3032   stmt = STMT_VINFO_STMT (stmt_info);
3033
3034   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3035     {
3036     case GIMPLE_SINGLE_RHS:
3037       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
3038       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
3039       break;
3040     case GIMPLE_UNARY_RHS:
3041       reduction_op = gimple_assign_rhs1 (stmt);
3042       break;
3043     case GIMPLE_BINARY_RHS:
3044       reduction_op = gimple_assign_rhs2 (stmt);
3045       break;
3046     case GIMPLE_TERNARY_RHS:
3047       reduction_op = gimple_assign_rhs3 (stmt);
3048       break;
3049     default:
3050       gcc_unreachable ();
3051     }
3052
3053   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3054   if (!vectype)
3055     {
3056       if (dump_enabled_p ())
3057         {
3058           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3059                            "unsupported data-type ");
3060           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3061                              TREE_TYPE (reduction_op));
3062           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3063         }
3064       return false;
3065    }
3066
3067   mode = TYPE_MODE (vectype);
3068   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3069
3070   if (!orig_stmt)
3071     orig_stmt = STMT_VINFO_STMT (stmt_info);
3072
3073   code = gimple_assign_rhs_code (orig_stmt);
3074
3075   /* Add in cost for initial definition.  */
3076   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3077                                   stmt_info, 0, vect_prologue);
3078
3079   /* Determine cost of epilogue code.
3080
3081      We have a reduction operator that will reduce the vector in one statement.
3082      Also requires scalar extract.  */
3083
3084   if (!nested_in_vect_loop_p (loop, orig_stmt))
3085     {
3086       if (reduc_code != ERROR_MARK)
3087         {
3088           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3089                                           stmt_info, 0, vect_epilogue);
3090           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3091                                           stmt_info, 0, vect_epilogue);
3092         }
3093       else
3094         {
3095           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3096           tree bitsize =
3097             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3098           int element_bitsize = tree_low_cst (bitsize, 1);
3099           int nelements = vec_size_in_bits / element_bitsize;
3100
3101           optab = optab_for_tree_code (code, vectype, optab_default);
3102
3103           /* We have a whole vector shift available.  */
3104           if (VECTOR_MODE_P (mode)
3105               && optab_handler (optab, mode) != CODE_FOR_nothing
3106               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3107             {
3108               /* Final reduction via vector shifts and the reduction operator.
3109                  Also requires scalar extract.  */
3110               epilogue_cost += add_stmt_cost (target_cost_data,
3111                                               exact_log2 (nelements) * 2,
3112                                               vector_stmt, stmt_info, 0,
3113                                               vect_epilogue);
3114               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3115                                               vec_to_scalar, stmt_info, 0,
3116                                               vect_epilogue);
3117             }
3118           else
3119             /* Use extracts and reduction op for final reduction.  For N
3120                elements, we have N extracts and N-1 reduction ops.  */
3121             epilogue_cost += add_stmt_cost (target_cost_data,
3122                                             nelements + nelements - 1,
3123                                             vector_stmt, stmt_info, 0,
3124                                             vect_epilogue);
3125         }
3126     }
3127
3128   if (dump_enabled_p ())
3129     dump_printf (MSG_NOTE,
3130                  "vect_model_reduction_cost: inside_cost = %d, "
3131                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3132                  prologue_cost, epilogue_cost);
3133
3134   return true;
3135 }
3136
3137
3138 /* Function vect_model_induction_cost.
3139
3140    Models cost for induction operations.  */
3141
3142 static void
3143 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3144 {
3145   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3146   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3147   unsigned inside_cost, prologue_cost;
3148
3149   /* loop cost for vec_loop.  */
3150   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3151                                stmt_info, 0, vect_body);
3152
3153   /* prologue cost for vec_init and vec_step.  */
3154   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3155                                  stmt_info, 0, vect_prologue);
3156
3157   if (dump_enabled_p ())
3158     dump_printf_loc (MSG_NOTE, vect_location,
3159                      "vect_model_induction_cost: inside_cost = %d, "
3160                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3161 }
3162
3163
3164 /* Function get_initial_def_for_induction
3165
3166    Input:
3167    STMT - a stmt that performs an induction operation in the loop.
3168    IV_PHI - the initial value of the induction variable
3169
3170    Output:
3171    Return a vector variable, initialized with the first VF values of
3172    the induction variable.  E.g., for an iv with IV_PHI='X' and
3173    evolution S, for a vector of 4 units, we want to return:
3174    [X, X + S, X + 2*S, X + 3*S].  */
3175
3176 static tree
3177 get_initial_def_for_induction (gimple iv_phi)
3178 {
3179   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3180   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3181   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3182   tree vectype;
3183   int nunits;
3184   edge pe = loop_preheader_edge (loop);
3185   struct loop *iv_loop;
3186   basic_block new_bb;
3187   tree new_vec, vec_init, vec_step, t;
3188   tree access_fn;
3189   tree new_var;
3190   tree new_name;
3191   gimple init_stmt, induction_phi, new_stmt;
3192   tree induc_def, vec_def, vec_dest;
3193   tree init_expr, step_expr;
3194   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3195   int i;
3196   bool ok;
3197   int ncopies;
3198   tree expr;
3199   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3200   bool nested_in_vect_loop = false;
3201   gimple_seq stmts = NULL;
3202   imm_use_iterator imm_iter;
3203   use_operand_p use_p;
3204   gimple exit_phi;
3205   edge latch_e;
3206   tree loop_arg;
3207   gimple_stmt_iterator si;
3208   basic_block bb = gimple_bb (iv_phi);
3209   tree stepvectype;
3210   tree resvectype;
3211
3212   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3213   if (nested_in_vect_loop_p (loop, iv_phi))
3214     {
3215       nested_in_vect_loop = true;
3216       iv_loop = loop->inner;
3217     }
3218   else
3219     iv_loop = loop;
3220   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3221
3222   latch_e = loop_latch_edge (iv_loop);
3223   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3224
3225   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3226   gcc_assert (access_fn);
3227   STRIP_NOPS (access_fn);
3228   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3229                                     &init_expr, &step_expr);
3230   gcc_assert (ok);
3231   pe = loop_preheader_edge (iv_loop);
3232
3233   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3234   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3235   gcc_assert (vectype);
3236   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3237   ncopies = vf / nunits;
3238
3239   gcc_assert (phi_info);
3240   gcc_assert (ncopies >= 1);
3241
3242   /* Find the first insertion point in the BB.  */
3243   si = gsi_after_labels (bb);
3244
3245   /* Create the vector that holds the initial_value of the induction.  */
3246   if (nested_in_vect_loop)
3247     {
3248       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3249          been created during vectorization of previous stmts.  We obtain it
3250          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3251       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3252                                            loop_preheader_edge (iv_loop));
3253       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3254       /* If the initial value is not of proper type, convert it.  */
3255       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3256         {
3257           new_stmt = gimple_build_assign_with_ops
3258               (VIEW_CONVERT_EXPR,
3259                vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"),
3260                build1 (VIEW_CONVERT_EXPR, vectype, vec_init), NULL_TREE);
3261           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3262           gimple_assign_set_lhs (new_stmt, vec_init);
3263           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3264                                                  new_stmt);
3265           gcc_assert (!new_bb);
3266           set_vinfo_for_stmt (new_stmt,
3267                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3268         }
3269     }
3270   else
3271     {
3272       vec<constructor_elt, va_gc> *v;
3273
3274       /* iv_loop is the loop to be vectorized. Create:
3275          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3276       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3277                                        vect_scalar_var, "var_");
3278       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3279                                                      init_expr),
3280                                        &stmts, false, new_var);
3281       if (stmts)
3282         {
3283           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3284           gcc_assert (!new_bb);
3285         }
3286
3287       vec_alloc (v, nunits);
3288       bool constant_p = is_gimple_min_invariant (new_name);
3289       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3290       for (i = 1; i < nunits; i++)
3291         {
3292           /* Create: new_name_i = new_name + step_expr  */
3293           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3294                                   new_name, step_expr);
3295           if (!is_gimple_min_invariant (new_name))
3296             {
3297               init_stmt = gimple_build_assign (new_var, new_name);
3298               new_name = make_ssa_name (new_var, init_stmt);
3299               gimple_assign_set_lhs (init_stmt, new_name);
3300               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3301               gcc_assert (!new_bb);
3302               if (dump_enabled_p ())
3303                 {
3304                   dump_printf_loc (MSG_NOTE, vect_location,
3305                                    "created new init_stmt: ");
3306                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3307                   dump_printf (MSG_NOTE, "\n");
3308                 }
3309               constant_p = false;
3310             }
3311           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3312         }
3313       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3314       if (constant_p)
3315         new_vec = build_vector_from_ctor (vectype, v);
3316       else
3317         new_vec = build_constructor (vectype, v);
3318       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3319     }
3320
3321
3322   /* Create the vector that holds the step of the induction.  */
3323   if (nested_in_vect_loop)
3324     /* iv_loop is nested in the loop to be vectorized. Generate:
3325        vec_step = [S, S, S, S]  */
3326     new_name = step_expr;
3327   else
3328     {
3329       /* iv_loop is the loop to be vectorized. Generate:
3330           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3331       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3332         {
3333           expr = build_int_cst (integer_type_node, vf);
3334           expr = fold_convert (TREE_TYPE (step_expr), expr);
3335         }
3336       else
3337         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3338       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3339                               expr, step_expr);
3340       if (TREE_CODE (step_expr) == SSA_NAME)
3341         new_name = vect_init_vector (iv_phi, new_name,
3342                                      TREE_TYPE (step_expr), NULL);
3343     }
3344
3345   t = unshare_expr (new_name);
3346   gcc_assert (CONSTANT_CLASS_P (new_name)
3347               || TREE_CODE (new_name) == SSA_NAME);
3348   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3349   gcc_assert (stepvectype);
3350   new_vec = build_vector_from_val (stepvectype, t);
3351   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3352
3353
3354   /* Create the following def-use cycle:
3355      loop prolog:
3356          vec_init = ...
3357          vec_step = ...
3358      loop:
3359          vec_iv = PHI <vec_init, vec_loop>
3360          ...
3361          STMT
3362          ...
3363          vec_loop = vec_iv + vec_step;  */
3364
3365   /* Create the induction-phi that defines the induction-operand.  */
3366   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3367   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3368   set_vinfo_for_stmt (induction_phi,
3369                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3370   induc_def = PHI_RESULT (induction_phi);
3371
3372   /* Create the iv update inside the loop  */
3373   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3374                                            induc_def, vec_step);
3375   vec_def = make_ssa_name (vec_dest, new_stmt);
3376   gimple_assign_set_lhs (new_stmt, vec_def);
3377   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3378   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3379                                                    NULL));
3380
3381   /* Set the arguments of the phi node:  */
3382   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3383   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3384                UNKNOWN_LOCATION);
3385
3386
3387   /* In case that vectorization factor (VF) is bigger than the number
3388      of elements that we can fit in a vectype (nunits), we have to generate
3389      more than one vector stmt - i.e - we need to "unroll" the
3390      vector stmt by a factor VF/nunits.  For more details see documentation
3391      in vectorizable_operation.  */
3392
3393   if (ncopies > 1)
3394     {
3395       stmt_vec_info prev_stmt_vinfo;
3396       /* FORNOW. This restriction should be relaxed.  */
3397       gcc_assert (!nested_in_vect_loop);
3398
3399       /* Create the vector that holds the step of the induction.  */
3400       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3401         {
3402           expr = build_int_cst (integer_type_node, nunits);
3403           expr = fold_convert (TREE_TYPE (step_expr), expr);
3404         }
3405       else
3406         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3407       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3408                               expr, step_expr);
3409       if (TREE_CODE (step_expr) == SSA_NAME)
3410         new_name = vect_init_vector (iv_phi, new_name,
3411                                      TREE_TYPE (step_expr), NULL);
3412       t = unshare_expr (new_name);
3413       gcc_assert (CONSTANT_CLASS_P (new_name)
3414                   || TREE_CODE (new_name) == SSA_NAME);
3415       new_vec = build_vector_from_val (stepvectype, t);
3416       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3417
3418       vec_def = induc_def;
3419       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3420       for (i = 1; i < ncopies; i++)
3421         {
3422           /* vec_i = vec_prev + vec_step  */
3423           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3424                                                    vec_def, vec_step);
3425           vec_def = make_ssa_name (vec_dest, new_stmt);
3426           gimple_assign_set_lhs (new_stmt, vec_def);
3427
3428           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3429           if (!useless_type_conversion_p (resvectype, vectype))
3430             {
3431               new_stmt = gimple_build_assign_with_ops
3432                   (VIEW_CONVERT_EXPR,
3433                    vect_get_new_vect_var (resvectype, vect_simple_var,
3434                                           "vec_iv_"),
3435                    build1 (VIEW_CONVERT_EXPR, resvectype,
3436                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3437               gimple_assign_set_lhs (new_stmt,
3438                                      make_ssa_name
3439                                        (gimple_assign_lhs (new_stmt), new_stmt));
3440               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3441             }
3442           set_vinfo_for_stmt (new_stmt,
3443                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3444           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3445           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3446         }
3447     }
3448
3449   if (nested_in_vect_loop)
3450     {
3451       /* Find the loop-closed exit-phi of the induction, and record
3452          the final vector of induction results:  */
3453       exit_phi = NULL;
3454       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3455         {
3456           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3457             {
3458               exit_phi = USE_STMT (use_p);
3459               break;
3460             }
3461         }
3462       if (exit_phi)
3463         {
3464           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3465           /* FORNOW. Currently not supporting the case that an inner-loop induction
3466              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3467           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3468                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3469
3470           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3471           if (dump_enabled_p ())
3472             {
3473               dump_printf_loc (MSG_NOTE, vect_location,
3474                                "vector of inductions after inner-loop:");
3475               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3476               dump_printf (MSG_NOTE, "\n");
3477             }
3478         }
3479     }
3480
3481
3482   if (dump_enabled_p ())
3483     {
3484       dump_printf_loc (MSG_NOTE, vect_location,
3485                        "transform induction: created def-use cycle: ");
3486       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3487       dump_printf (MSG_NOTE, "\n");
3488       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3489                         SSA_NAME_DEF_STMT (vec_def), 0);
3490       dump_printf (MSG_NOTE, "\n");
3491     }
3492
3493   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3494   if (!useless_type_conversion_p (resvectype, vectype))
3495     {
3496       new_stmt = gimple_build_assign_with_ops
3497          (VIEW_CONVERT_EXPR,
3498           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3499           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3500       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3501       gimple_assign_set_lhs (new_stmt, induc_def);
3502       si = gsi_after_labels (bb);
3503       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3504       set_vinfo_for_stmt (new_stmt,
3505                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3506       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3507         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3508     }
3509
3510   return induc_def;
3511 }
3512
3513
3514 /* Function get_initial_def_for_reduction
3515
3516    Input:
3517    STMT - a stmt that performs a reduction operation in the loop.
3518    INIT_VAL - the initial value of the reduction variable
3519
3520    Output:
3521    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3522         of the reduction (used for adjusting the epilog - see below).
3523    Return a vector variable, initialized according to the operation that STMT
3524         performs. This vector will be used as the initial value of the
3525         vector of partial results.
3526
3527    Option1 (adjust in epilog): Initialize the vector as follows:
3528      add/bit or/xor:    [0,0,...,0,0]
3529      mult/bit and:      [1,1,...,1,1]
3530      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3531    and when necessary (e.g. add/mult case) let the caller know
3532    that it needs to adjust the result by init_val.
3533
3534    Option2: Initialize the vector as follows:
3535      add/bit or/xor:    [init_val,0,0,...,0]
3536      mult/bit and:      [init_val,1,1,...,1]
3537      min/max/cond_expr: [init_val,init_val,...,init_val]
3538    and no adjustments are needed.
3539
3540    For example, for the following code:
3541
3542    s = init_val;
3543    for (i=0;i<n;i++)
3544      s = s + a[i];
3545
3546    STMT is 's = s + a[i]', and the reduction variable is 's'.
3547    For a vector of 4 units, we want to return either [0,0,0,init_val],
3548    or [0,0,0,0] and let the caller know that it needs to adjust
3549    the result at the end by 'init_val'.
3550
3551    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3552    initialization vector is simpler (same element in all entries), if
3553    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3554
3555    A cost model should help decide between these two schemes.  */
3556
3557 tree
3558 get_initial_def_for_reduction (gimple stmt, tree init_val,
3559                                tree *adjustment_def)
3560 {
3561   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3562   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3563   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3564   tree scalar_type = TREE_TYPE (init_val);
3565   tree vectype = get_vectype_for_scalar_type (scalar_type);
3566   int nunits;
3567   enum tree_code code = gimple_assign_rhs_code (stmt);
3568   tree def_for_init;
3569   tree init_def;
3570   tree *elts;
3571   int i;
3572   bool nested_in_vect_loop = false;
3573   tree init_value;
3574   REAL_VALUE_TYPE real_init_val = dconst0;
3575   int int_init_val = 0;
3576   gimple def_stmt = NULL;
3577
3578   gcc_assert (vectype);
3579   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3580
3581   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3582               || SCALAR_FLOAT_TYPE_P (scalar_type));
3583
3584   if (nested_in_vect_loop_p (loop, stmt))
3585     nested_in_vect_loop = true;
3586   else
3587     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3588
3589   /* In case of double reduction we only create a vector variable to be put
3590      in the reduction phi node.  The actual statement creation is done in
3591      vect_create_epilog_for_reduction.  */
3592   if (adjustment_def && nested_in_vect_loop
3593       && TREE_CODE (init_val) == SSA_NAME
3594       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3595       && gimple_code (def_stmt) == GIMPLE_PHI
3596       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3597       && vinfo_for_stmt (def_stmt)
3598       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3599           == vect_double_reduction_def)
3600     {
3601       *adjustment_def = NULL;
3602       return vect_create_destination_var (init_val, vectype);
3603     }
3604
3605   if (TREE_CONSTANT (init_val))
3606     {
3607       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3608         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3609       else
3610         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3611     }
3612   else
3613     init_value = init_val;
3614
3615   switch (code)
3616     {
3617       case WIDEN_SUM_EXPR:
3618       case DOT_PROD_EXPR:
3619       case PLUS_EXPR:
3620       case MINUS_EXPR:
3621       case BIT_IOR_EXPR:
3622       case BIT_XOR_EXPR:
3623       case MULT_EXPR:
3624       case BIT_AND_EXPR:
3625         /* ADJUSMENT_DEF is NULL when called from
3626            vect_create_epilog_for_reduction to vectorize double reduction.  */
3627         if (adjustment_def)
3628           {
3629             if (nested_in_vect_loop)
3630               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3631                                                               NULL);
3632             else
3633               *adjustment_def = init_val;
3634           }
3635
3636         if (code == MULT_EXPR)
3637           {
3638             real_init_val = dconst1;
3639             int_init_val = 1;
3640           }
3641
3642         if (code == BIT_AND_EXPR)
3643           int_init_val = -1;
3644
3645         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3646           def_for_init = build_real (scalar_type, real_init_val);
3647         else
3648           def_for_init = build_int_cst (scalar_type, int_init_val);
3649
3650         /* Create a vector of '0' or '1' except the first element.  */
3651         elts = XALLOCAVEC (tree, nunits);
3652         for (i = nunits - 2; i >= 0; --i)
3653           elts[i + 1] = def_for_init;
3654
3655         /* Option1: the first element is '0' or '1' as well.  */
3656         if (adjustment_def)
3657           {
3658             elts[0] = def_for_init;
3659             init_def = build_vector (vectype, elts);
3660             break;
3661           }
3662
3663         /* Option2: the first element is INIT_VAL.  */
3664         elts[0] = init_val;
3665         if (TREE_CONSTANT (init_val))
3666           init_def = build_vector (vectype, elts);
3667         else
3668           {
3669             vec<constructor_elt, va_gc> *v;
3670             vec_alloc (v, nunits);
3671             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3672             for (i = 1; i < nunits; ++i)
3673               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3674             init_def = build_constructor (vectype, v);
3675           }
3676
3677         break;
3678
3679       case MIN_EXPR:
3680       case MAX_EXPR:
3681       case COND_EXPR:
3682         if (adjustment_def)
3683           {
3684             *adjustment_def = NULL_TREE;
3685             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3686             break;
3687           }
3688
3689         init_def = build_vector_from_val (vectype, init_value);
3690         break;
3691
3692       default:
3693         gcc_unreachable ();
3694     }
3695
3696   return init_def;
3697 }
3698
3699
3700 /* Function vect_create_epilog_for_reduction
3701
3702    Create code at the loop-epilog to finalize the result of a reduction
3703    computation.
3704
3705    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3706      reduction statements.
3707    STMT is the scalar reduction stmt that is being vectorized.
3708    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3709      number of elements that we can fit in a vectype (nunits).  In this case
3710      we have to generate more than one vector stmt - i.e - we need to "unroll"
3711      the vector stmt by a factor VF/nunits.  For more details see documentation
3712      in vectorizable_operation.
3713    REDUC_CODE is the tree-code for the epilog reduction.
3714    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3715      computation.
3716    REDUC_INDEX is the index of the operand in the right hand side of the
3717      statement that is defined by REDUCTION_PHI.
3718    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3719    SLP_NODE is an SLP node containing a group of reduction statements. The
3720      first one in this group is STMT.
3721
3722    This function:
3723    1. Creates the reduction def-use cycles: sets the arguments for
3724       REDUCTION_PHIS:
3725       The loop-entry argument is the vectorized initial-value of the reduction.
3726       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3727       sums.
3728    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3729       by applying the operation specified by REDUC_CODE if available, or by
3730       other means (whole-vector shifts or a scalar loop).
3731       The function also creates a new phi node at the loop exit to preserve
3732       loop-closed form, as illustrated below.
3733
3734      The flow at the entry to this function:
3735
3736         loop:
3737           vec_def = phi <null, null>            # REDUCTION_PHI
3738           VECT_DEF = vector_stmt                # vectorized form of STMT
3739           s_loop = scalar_stmt                  # (scalar) STMT
3740         loop_exit:
3741           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3742           use <s_out0>
3743           use <s_out0>
3744
3745      The above is transformed by this function into:
3746
3747         loop:
3748           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3749           VECT_DEF = vector_stmt                # vectorized form of STMT
3750           s_loop = scalar_stmt                  # (scalar) STMT
3751         loop_exit:
3752           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3753           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3754           v_out2 = reduce <v_out1>
3755           s_out3 = extract_field <v_out2, 0>
3756           s_out4 = adjust_result <s_out3>
3757           use <s_out4>
3758           use <s_out4>
3759 */
3760
3761 static void
3762 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
3763                                   int ncopies, enum tree_code reduc_code,
3764                                   vec<gimple> reduction_phis,
3765                                   int reduc_index, bool double_reduc,
3766                                   slp_tree slp_node)
3767 {
3768   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3769   stmt_vec_info prev_phi_info;
3770   tree vectype;
3771   enum machine_mode mode;
3772   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3773   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3774   basic_block exit_bb;
3775   tree scalar_dest;
3776   tree scalar_type;
3777   gimple new_phi = NULL, phi;
3778   gimple_stmt_iterator exit_gsi;
3779   tree vec_dest;
3780   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3781   gimple epilog_stmt = NULL;
3782   enum tree_code code = gimple_assign_rhs_code (stmt);
3783   gimple exit_phi;
3784   tree bitsize, bitpos;
3785   tree adjustment_def = NULL;
3786   tree vec_initial_def = NULL;
3787   tree reduction_op, expr, def;
3788   tree orig_name, scalar_result;
3789   imm_use_iterator imm_iter, phi_imm_iter;
3790   use_operand_p use_p, phi_use_p;
3791   bool extract_scalar_result = false;
3792   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3793   bool nested_in_vect_loop = false;
3794   vec<gimple> new_phis = vNULL;
3795   vec<gimple> inner_phis = vNULL;
3796   enum vect_def_type dt = vect_unknown_def_type;
3797   int j, i;
3798   vec<tree> scalar_results = vNULL;
3799   unsigned int group_size = 1, k, ratio;
3800   vec<tree> vec_initial_defs = vNULL;
3801   vec<gimple> phis;
3802   bool slp_reduc = false;
3803   tree new_phi_result;
3804   gimple inner_phi = NULL;
3805
3806   if (slp_node)
3807     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
3808
3809   if (nested_in_vect_loop_p (loop, stmt))
3810     {
3811       outer_loop = loop;
3812       loop = loop->inner;
3813       nested_in_vect_loop = true;
3814       gcc_assert (!slp_node);
3815     }
3816
3817   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3818     {
3819     case GIMPLE_SINGLE_RHS:
3820       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3821                   == ternary_op);
3822       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3823       break;
3824     case GIMPLE_UNARY_RHS:
3825       reduction_op = gimple_assign_rhs1 (stmt);
3826       break;
3827     case GIMPLE_BINARY_RHS:
3828       reduction_op = reduc_index ?
3829                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3830       break;
3831     case GIMPLE_TERNARY_RHS:
3832       reduction_op = gimple_op (stmt, reduc_index + 1);
3833       break;
3834     default:
3835       gcc_unreachable ();
3836     }
3837
3838   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3839   gcc_assert (vectype);
3840   mode = TYPE_MODE (vectype);
3841
3842   /* 1. Create the reduction def-use cycle:
3843      Set the arguments of REDUCTION_PHIS, i.e., transform
3844
3845         loop:
3846           vec_def = phi <null, null>            # REDUCTION_PHI
3847           VECT_DEF = vector_stmt                # vectorized form of STMT
3848           ...
3849
3850      into:
3851
3852         loop:
3853           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3854           VECT_DEF = vector_stmt                # vectorized form of STMT
3855           ...
3856
3857      (in case of SLP, do it for all the phis). */
3858
3859   /* Get the loop-entry arguments.  */
3860   if (slp_node)
3861     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3862                        NULL, slp_node, reduc_index);
3863   else
3864     {
3865       vec_initial_defs.create (1);
3866      /* For the case of reduction, vect_get_vec_def_for_operand returns
3867         the scalar def before the loop, that defines the initial value
3868         of the reduction variable.  */
3869       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3870                                                       &adjustment_def);
3871       vec_initial_defs.quick_push (vec_initial_def);
3872     }
3873
3874   /* Set phi nodes arguments.  */
3875   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
3876     {
3877       tree vec_init_def = vec_initial_defs[i];
3878       tree def = vect_defs[i];
3879       for (j = 0; j < ncopies; j++)
3880         {
3881           /* Set the loop-entry arg of the reduction-phi.  */
3882           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3883                        UNKNOWN_LOCATION);
3884
3885           /* Set the loop-latch arg for the reduction-phi.  */
3886           if (j > 0)
3887             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3888
3889           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3890
3891           if (dump_enabled_p ())
3892             {
3893               dump_printf_loc (MSG_NOTE, vect_location,
3894                                "transform reduction: created def-use cycle: ");
3895               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3896               dump_printf (MSG_NOTE, "\n");
3897               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3898               dump_printf (MSG_NOTE, "\n");
3899             }
3900
3901           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3902         }
3903     }
3904
3905   vec_initial_defs.release ();
3906
3907   /* 2. Create epilog code.
3908         The reduction epilog code operates across the elements of the vector
3909         of partial results computed by the vectorized loop.
3910         The reduction epilog code consists of:
3911
3912         step 1: compute the scalar result in a vector (v_out2)
3913         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3914         step 3: adjust the scalar result (s_out3) if needed.
3915
3916         Step 1 can be accomplished using one the following three schemes:
3917           (scheme 1) using reduc_code, if available.
3918           (scheme 2) using whole-vector shifts, if available.
3919           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3920                      combined.
3921
3922           The overall epilog code looks like this:
3923
3924           s_out0 = phi <s_loop>         # original EXIT_PHI
3925           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3926           v_out2 = reduce <v_out1>              # step 1
3927           s_out3 = extract_field <v_out2, 0>    # step 2
3928           s_out4 = adjust_result <s_out3>       # step 3
3929
3930           (step 3 is optional, and steps 1 and 2 may be combined).
3931           Lastly, the uses of s_out0 are replaced by s_out4.  */
3932
3933
3934   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3935          v_out1 = phi <VECT_DEF>
3936          Store them in NEW_PHIS.  */
3937
3938   exit_bb = single_exit (loop)->dest;
3939   prev_phi_info = NULL;
3940   new_phis.create (vect_defs.length ());
3941   FOR_EACH_VEC_ELT (vect_defs, i, def)
3942     {
3943       for (j = 0; j < ncopies; j++)
3944         {
3945           tree new_def = copy_ssa_name (def, NULL);
3946           phi = create_phi_node (new_def, exit_bb);
3947           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3948           if (j == 0)
3949             new_phis.quick_push (phi);
3950           else
3951             {
3952               def = vect_get_vec_def_for_stmt_copy (dt, def);
3953               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3954             }
3955
3956           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3957           prev_phi_info = vinfo_for_stmt (phi);
3958         }
3959     }
3960
3961   /* The epilogue is created for the outer-loop, i.e., for the loop being
3962      vectorized.  Create exit phis for the outer loop.  */
3963   if (double_reduc)
3964     {
3965       loop = outer_loop;
3966       exit_bb = single_exit (loop)->dest;
3967       inner_phis.create (vect_defs.length ());
3968       FOR_EACH_VEC_ELT (new_phis, i, phi)
3969         {
3970           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3971           gimple outer_phi = create_phi_node (new_result, exit_bb);
3972           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3973                            PHI_RESULT (phi));
3974           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3975                                                             loop_vinfo, NULL));
3976           inner_phis.quick_push (phi);
3977           new_phis[i] = outer_phi;
3978           prev_phi_info = vinfo_for_stmt (outer_phi);
3979           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3980             {
3981               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3982               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3983               outer_phi = create_phi_node (new_result, exit_bb);
3984               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3985                                PHI_RESULT (phi));
3986               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3987                                                         loop_vinfo, NULL));
3988               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3989               prev_phi_info = vinfo_for_stmt (outer_phi);
3990             }
3991         }
3992     }
3993
3994   exit_gsi = gsi_after_labels (exit_bb);
3995
3996   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3997          (i.e. when reduc_code is not available) and in the final adjustment
3998          code (if needed).  Also get the original scalar reduction variable as
3999          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4000          represents a reduction pattern), the tree-code and scalar-def are
4001          taken from the original stmt that the pattern-stmt (STMT) replaces.
4002          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4003          are taken from STMT.  */
4004
4005   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4006   if (!orig_stmt)
4007     {
4008       /* Regular reduction  */
4009       orig_stmt = stmt;
4010     }
4011   else
4012     {
4013       /* Reduction pattern  */
4014       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4015       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4016       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4017     }
4018
4019   code = gimple_assign_rhs_code (orig_stmt);
4020   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4021      partial results are added and not subtracted.  */
4022   if (code == MINUS_EXPR)
4023     code = PLUS_EXPR;
4024
4025   scalar_dest = gimple_assign_lhs (orig_stmt);
4026   scalar_type = TREE_TYPE (scalar_dest);
4027   scalar_results.create (group_size);
4028   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4029   bitsize = TYPE_SIZE (scalar_type);
4030
4031   /* In case this is a reduction in an inner-loop while vectorizing an outer
4032      loop - we don't need to extract a single scalar result at the end of the
4033      inner-loop (unless it is double reduction, i.e., the use of reduction is
4034      outside the outer-loop).  The final vector of partial results will be used
4035      in the vectorized outer-loop, or reduced to a scalar result at the end of
4036      the outer-loop.  */
4037   if (nested_in_vect_loop && !double_reduc)
4038     goto vect_finalize_reduction;
4039
4040   /* SLP reduction without reduction chain, e.g.,
4041      # a1 = phi <a2, a0>
4042      # b1 = phi <b2, b0>
4043      a2 = operation (a1)
4044      b2 = operation (b1)  */
4045   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4046
4047   /* In case of reduction chain, e.g.,
4048      # a1 = phi <a3, a0>
4049      a2 = operation (a1)
4050      a3 = operation (a2),
4051
4052      we may end up with more than one vector result.  Here we reduce them to
4053      one vector.  */
4054   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4055     {
4056       tree first_vect = PHI_RESULT (new_phis[0]);
4057       tree tmp;
4058       gimple new_vec_stmt = NULL;
4059
4060       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4061       for (k = 1; k < new_phis.length (); k++)
4062         {
4063           gimple next_phi = new_phis[k];
4064           tree second_vect = PHI_RESULT (next_phi);
4065
4066           tmp = build2 (code, vectype,  first_vect, second_vect);
4067           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4068           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4069           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4070           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4071         }
4072
4073       new_phi_result = first_vect;
4074       if (new_vec_stmt)
4075         {
4076           new_phis.truncate (0);
4077           new_phis.safe_push (new_vec_stmt);
4078         }
4079     }
4080   else
4081     new_phi_result = PHI_RESULT (new_phis[0]);
4082
4083   /* 2.3 Create the reduction code, using one of the three schemes described
4084          above. In SLP we simply need to extract all the elements from the
4085          vector (without reducing them), so we use scalar shifts.  */
4086   if (reduc_code != ERROR_MARK && !slp_reduc)
4087     {
4088       tree tmp;
4089
4090       /*** Case 1:  Create:
4091            v_out2 = reduc_expr <v_out1>  */
4092
4093       if (dump_enabled_p ())
4094         dump_printf_loc (MSG_NOTE, vect_location,
4095                          "Reduce using direct vector reduction.\n");
4096
4097       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4098       tmp = build1 (reduc_code, vectype, new_phi_result);
4099       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4100       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4101       gimple_assign_set_lhs (epilog_stmt, new_temp);
4102       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4103
4104       extract_scalar_result = true;
4105     }
4106   else
4107     {
4108       enum tree_code shift_code = ERROR_MARK;
4109       bool have_whole_vector_shift = true;
4110       int bit_offset;
4111       int element_bitsize = tree_low_cst (bitsize, 1);
4112       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4113       tree vec_temp;
4114
4115       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4116         shift_code = VEC_RSHIFT_EXPR;
4117       else
4118         have_whole_vector_shift = false;
4119
4120       /* Regardless of whether we have a whole vector shift, if we're
4121          emulating the operation via tree-vect-generic, we don't want
4122          to use it.  Only the first round of the reduction is likely
4123          to still be profitable via emulation.  */
4124       /* ??? It might be better to emit a reduction tree code here, so that
4125          tree-vect-generic can expand the first round via bit tricks.  */
4126       if (!VECTOR_MODE_P (mode))
4127         have_whole_vector_shift = false;
4128       else
4129         {
4130           optab optab = optab_for_tree_code (code, vectype, optab_default);
4131           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4132             have_whole_vector_shift = false;
4133         }
4134
4135       if (have_whole_vector_shift && !slp_reduc)
4136         {
4137           /*** Case 2: Create:
4138              for (offset = VS/2; offset >= element_size; offset/=2)
4139                 {
4140                   Create:  va' = vec_shift <va, offset>
4141                   Create:  va = vop <va, va'>
4142                 }  */
4143
4144           if (dump_enabled_p ())
4145             dump_printf_loc (MSG_NOTE, vect_location,
4146                              "Reduce using vector shifts\n");
4147
4148           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4149           new_temp = new_phi_result;
4150           for (bit_offset = vec_size_in_bits/2;
4151                bit_offset >= element_bitsize;
4152                bit_offset /= 2)
4153             {
4154               tree bitpos = size_int (bit_offset);
4155
4156               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4157                                                vec_dest, new_temp, bitpos);
4158               new_name = make_ssa_name (vec_dest, epilog_stmt);
4159               gimple_assign_set_lhs (epilog_stmt, new_name);
4160               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4161
4162               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4163                                                           new_name, new_temp);
4164               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4165               gimple_assign_set_lhs (epilog_stmt, new_temp);
4166               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4167             }
4168
4169           extract_scalar_result = true;
4170         }
4171       else
4172         {
4173           tree rhs;
4174
4175           /*** Case 3: Create:
4176              s = extract_field <v_out2, 0>
4177              for (offset = element_size;
4178                   offset < vector_size;
4179                   offset += element_size;)
4180                {
4181                  Create:  s' = extract_field <v_out2, offset>
4182                  Create:  s = op <s, s'>  // For non SLP cases
4183                }  */
4184
4185           if (dump_enabled_p ())
4186             dump_printf_loc (MSG_NOTE, vect_location,
4187                              "Reduce using scalar code.\n");
4188
4189           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4190           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4191             {
4192               if (gimple_code (new_phi) == GIMPLE_PHI)
4193                 vec_temp = PHI_RESULT (new_phi);
4194               else
4195                 vec_temp = gimple_assign_lhs (new_phi);
4196               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4197                             bitsize_zero_node);
4198               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4199               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4200               gimple_assign_set_lhs (epilog_stmt, new_temp);
4201               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4202
4203               /* In SLP we don't need to apply reduction operation, so we just
4204                  collect s' values in SCALAR_RESULTS.  */
4205               if (slp_reduc)
4206                 scalar_results.safe_push (new_temp);
4207
4208               for (bit_offset = element_bitsize;
4209                    bit_offset < vec_size_in_bits;
4210                    bit_offset += element_bitsize)
4211                 {
4212                   tree bitpos = bitsize_int (bit_offset);
4213                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4214                                      bitsize, bitpos);
4215
4216                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4217                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4218                   gimple_assign_set_lhs (epilog_stmt, new_name);
4219                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4220
4221                   if (slp_reduc)
4222                     {
4223                       /* In SLP we don't need to apply reduction operation, so
4224                          we just collect s' values in SCALAR_RESULTS.  */
4225                       new_temp = new_name;
4226                       scalar_results.safe_push (new_name);
4227                     }
4228                   else
4229                     {
4230                       epilog_stmt = gimple_build_assign_with_ops (code,
4231                                           new_scalar_dest, new_name, new_temp);
4232                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4233                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4234                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4235                     }
4236                 }
4237             }
4238
4239           /* The only case where we need to reduce scalar results in SLP, is
4240              unrolling.  If the size of SCALAR_RESULTS is greater than
4241              GROUP_SIZE, we reduce them combining elements modulo
4242              GROUP_SIZE.  */
4243           if (slp_reduc)
4244             {
4245               tree res, first_res, new_res;
4246               gimple new_stmt;
4247
4248               /* Reduce multiple scalar results in case of SLP unrolling.  */
4249               for (j = group_size; scalar_results.iterate (j, &res);
4250                    j++)
4251                 {
4252                   first_res = scalar_results[j % group_size];
4253                   new_stmt = gimple_build_assign_with_ops (code,
4254                                               new_scalar_dest, first_res, res);
4255                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4256                   gimple_assign_set_lhs (new_stmt, new_res);
4257                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4258                   scalar_results[j % group_size] = new_res;
4259                 }
4260             }
4261           else
4262             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4263             scalar_results.safe_push (new_temp);
4264
4265           extract_scalar_result = false;
4266         }
4267     }
4268
4269   /* 2.4  Extract the final scalar result.  Create:
4270           s_out3 = extract_field <v_out2, bitpos>  */
4271
4272   if (extract_scalar_result)
4273     {
4274       tree rhs;
4275
4276       if (dump_enabled_p ())
4277         dump_printf_loc (MSG_NOTE, vect_location,
4278                          "extract scalar result\n");
4279
4280       if (BYTES_BIG_ENDIAN)
4281         bitpos = size_binop (MULT_EXPR,
4282                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4283                              TYPE_SIZE (scalar_type));
4284       else
4285         bitpos = bitsize_zero_node;
4286
4287       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4288       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4289       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4290       gimple_assign_set_lhs (epilog_stmt, new_temp);
4291       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4292       scalar_results.safe_push (new_temp);
4293     }
4294
4295 vect_finalize_reduction:
4296
4297   if (double_reduc)
4298     loop = loop->inner;
4299
4300   /* 2.5 Adjust the final result by the initial value of the reduction
4301          variable. (When such adjustment is not needed, then
4302          'adjustment_def' is zero).  For example, if code is PLUS we create:
4303          new_temp = loop_exit_def + adjustment_def  */
4304
4305   if (adjustment_def)
4306     {
4307       gcc_assert (!slp_reduc);
4308       if (nested_in_vect_loop)
4309         {
4310           new_phi = new_phis[0];
4311           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4312           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4313           new_dest = vect_create_destination_var (scalar_dest, vectype);
4314         }
4315       else
4316         {
4317           new_temp = scalar_results[0];
4318           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4319           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4320           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4321         }
4322
4323       epilog_stmt = gimple_build_assign (new_dest, expr);
4324       new_temp = make_ssa_name (new_dest, epilog_stmt);
4325       gimple_assign_set_lhs (epilog_stmt, new_temp);
4326       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4327       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4328       if (nested_in_vect_loop)
4329         {
4330           set_vinfo_for_stmt (epilog_stmt,
4331                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4332                                                  NULL));
4333           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4334                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4335
4336           if (!double_reduc)
4337             scalar_results.quick_push (new_temp);
4338           else
4339             scalar_results[0] = new_temp;
4340         }
4341       else
4342         scalar_results[0] = new_temp;
4343
4344       new_phis[0] = epilog_stmt;
4345     }
4346
4347   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4348           phis with new adjusted scalar results, i.e., replace use <s_out0>
4349           with use <s_out4>.
4350
4351      Transform:
4352         loop_exit:
4353           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4354           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4355           v_out2 = reduce <v_out1>
4356           s_out3 = extract_field <v_out2, 0>
4357           s_out4 = adjust_result <s_out3>
4358           use <s_out0>
4359           use <s_out0>
4360
4361      into:
4362
4363         loop_exit:
4364           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4365           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4366           v_out2 = reduce <v_out1>
4367           s_out3 = extract_field <v_out2, 0>
4368           s_out4 = adjust_result <s_out3>
4369           use <s_out4>
4370           use <s_out4> */
4371
4372
4373   /* In SLP reduction chain we reduce vector results into one vector if
4374      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4375      the last stmt in the reduction chain, since we are looking for the loop
4376      exit phi node.  */
4377   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4378     {
4379       scalar_dest = gimple_assign_lhs (
4380                         SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
4381       group_size = 1;
4382     }
4383
4384   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4385      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4386      need to match SCALAR_RESULTS with corresponding statements.  The first
4387      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4388      the first vector stmt, etc.
4389      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4390   if (group_size > new_phis.length ())
4391     {
4392       ratio = group_size / new_phis.length ();
4393       gcc_assert (!(group_size % new_phis.length ()));
4394     }
4395   else
4396     ratio = 1;
4397
4398   for (k = 0; k < group_size; k++)
4399     {
4400       if (k % ratio == 0)
4401         {
4402           epilog_stmt = new_phis[k / ratio];
4403           reduction_phi = reduction_phis[k / ratio];
4404           if (double_reduc)
4405             inner_phi = inner_phis[k / ratio];
4406         }
4407
4408       if (slp_reduc)
4409         {
4410           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4411
4412           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4413           /* SLP statements can't participate in patterns.  */
4414           gcc_assert (!orig_stmt);
4415           scalar_dest = gimple_assign_lhs (current_stmt);
4416         }
4417
4418       phis.create (3);
4419       /* Find the loop-closed-use at the loop exit of the original scalar
4420          result.  (The reduction result is expected to have two immediate uses -
4421          one at the latch block, and one at the loop exit).  */
4422       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4423         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4424             && !is_gimple_debug (USE_STMT (use_p)))
4425           phis.safe_push (USE_STMT (use_p));
4426
4427       /* While we expect to have found an exit_phi because of loop-closed-ssa
4428          form we can end up without one if the scalar cycle is dead.  */
4429
4430       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4431         {
4432           if (outer_loop)
4433             {
4434               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4435               gimple vect_phi;
4436
4437               /* FORNOW. Currently not supporting the case that an inner-loop
4438                  reduction is not used in the outer-loop (but only outside the
4439                  outer-loop), unless it is double reduction.  */
4440               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4441                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4442                           || double_reduc);
4443
4444               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4445               if (!double_reduc
4446                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4447                       != vect_double_reduction_def)
4448                 continue;
4449
4450               /* Handle double reduction:
4451
4452                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4453                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4454                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4455                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4456
4457                  At that point the regular reduction (stmt2 and stmt3) is
4458                  already vectorized, as well as the exit phi node, stmt4.
4459                  Here we vectorize the phi node of double reduction, stmt1, and
4460                  update all relevant statements.  */
4461
4462               /* Go through all the uses of s2 to find double reduction phi
4463                  node, i.e., stmt1 above.  */
4464               orig_name = PHI_RESULT (exit_phi);
4465               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4466                 {
4467                   stmt_vec_info use_stmt_vinfo;
4468                   stmt_vec_info new_phi_vinfo;
4469                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4470                   basic_block bb = gimple_bb (use_stmt);
4471                   gimple use;
4472
4473                   /* Check that USE_STMT is really double reduction phi
4474                      node.  */
4475                   if (gimple_code (use_stmt) != GIMPLE_PHI
4476                       || gimple_phi_num_args (use_stmt) != 2
4477                       || bb->loop_father != outer_loop)
4478                     continue;
4479                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4480                   if (!use_stmt_vinfo
4481                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4482                           != vect_double_reduction_def)
4483                     continue;
4484
4485                   /* Create vector phi node for double reduction:
4486                      vs1 = phi <vs0, vs2>
4487                      vs1 was created previously in this function by a call to
4488                        vect_get_vec_def_for_operand and is stored in
4489                        vec_initial_def;
4490                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4491                      vs0 is created here.  */
4492
4493                   /* Create vector phi node.  */
4494                   vect_phi = create_phi_node (vec_initial_def, bb);
4495                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4496                                     loop_vec_info_for_loop (outer_loop), NULL);
4497                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4498
4499                   /* Create vs0 - initial def of the double reduction phi.  */
4500                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4501                                              loop_preheader_edge (outer_loop));
4502                   init_def = get_initial_def_for_reduction (stmt,
4503                                                           preheader_arg, NULL);
4504                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4505                                                     vectype, NULL);
4506
4507                   /* Update phi node arguments with vs0 and vs2.  */
4508                   add_phi_arg (vect_phi, vect_phi_init,
4509                                loop_preheader_edge (outer_loop),
4510                                UNKNOWN_LOCATION);
4511                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4512                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4513                   if (dump_enabled_p ())
4514                     {
4515                       dump_printf_loc (MSG_NOTE, vect_location,
4516                                        "created double reduction phi node: ");
4517                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4518                       dump_printf (MSG_NOTE, "\n");
4519                     }
4520
4521                   vect_phi_res = PHI_RESULT (vect_phi);
4522
4523                   /* Replace the use, i.e., set the correct vs1 in the regular
4524                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4525                      loop is redundant.  */
4526                   use = reduction_phi;
4527                   for (j = 0; j < ncopies; j++)
4528                     {
4529                       edge pr_edge = loop_preheader_edge (loop);
4530                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4531                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4532                     }
4533                 }
4534             }
4535         }
4536
4537       phis.release ();
4538       if (nested_in_vect_loop)
4539         {
4540           if (double_reduc)
4541             loop = outer_loop;
4542           else
4543             continue;
4544         }
4545
4546       phis.create (3);
4547       /* Find the loop-closed-use at the loop exit of the original scalar
4548          result.  (The reduction result is expected to have two immediate uses,
4549          one at the latch block, and one at the loop exit).  For double
4550          reductions we are looking for exit phis of the outer loop.  */
4551       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4552         {
4553           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4554             {
4555               if (!is_gimple_debug (USE_STMT (use_p)))
4556                 phis.safe_push (USE_STMT (use_p));
4557             }
4558           else
4559             {
4560               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4561                 {
4562                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4563
4564                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4565                     {
4566                       if (!flow_bb_inside_loop_p (loop,
4567                                              gimple_bb (USE_STMT (phi_use_p)))
4568                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4569                         phis.safe_push (USE_STMT (phi_use_p));
4570                     }
4571                 }
4572             }
4573         }
4574
4575       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4576         {
4577           /* Replace the uses:  */
4578           orig_name = PHI_RESULT (exit_phi);
4579           scalar_result = scalar_results[k];
4580           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4581             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4582               SET_USE (use_p, scalar_result);
4583         }
4584
4585       phis.release ();
4586     }
4587
4588   scalar_results.release ();
4589   inner_phis.release ();
4590   new_phis.release ();
4591 }
4592
4593
4594 /* Function vectorizable_reduction.
4595
4596    Check if STMT performs a reduction operation that can be vectorized.
4597    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4598    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4599    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4600
4601    This function also handles reduction idioms (patterns) that have been
4602    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4603    of this form:
4604      X = pattern_expr (arg0, arg1, ..., X)
4605    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4606    sequence that had been detected and replaced by the pattern-stmt (STMT).
4607
4608    In some cases of reduction patterns, the type of the reduction variable X is
4609    different than the type of the other arguments of STMT.
4610    In such cases, the vectype that is used when transforming STMT into a vector
4611    stmt is different than the vectype that is used to determine the
4612    vectorization factor, because it consists of a different number of elements
4613    than the actual number of elements that are being operated upon in parallel.
4614
4615    For example, consider an accumulation of shorts into an int accumulator.
4616    On some targets it's possible to vectorize this pattern operating on 8
4617    shorts at a time (hence, the vectype for purposes of determining the
4618    vectorization factor should be V8HI); on the other hand, the vectype that
4619    is used to create the vector form is actually V4SI (the type of the result).
4620
4621    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4622    indicates what is the actual level of parallelism (V8HI in the example), so
4623    that the right vectorization factor would be derived.  This vectype
4624    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4625    be used to create the vectorized stmt.  The right vectype for the vectorized
4626    stmt is obtained from the type of the result X:
4627         get_vectype_for_scalar_type (TREE_TYPE (X))
4628
4629    This means that, contrary to "regular" reductions (or "regular" stmts in
4630    general), the following equation:
4631       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4632    does *NOT* necessarily hold for reduction patterns.  */
4633
4634 bool
4635 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4636                         gimple *vec_stmt, slp_tree slp_node)
4637 {
4638   tree vec_dest;
4639   tree scalar_dest;
4640   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4641   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4642   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4643   tree vectype_in = NULL_TREE;
4644   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4645   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4646   enum tree_code code, orig_code, epilog_reduc_code;
4647   enum machine_mode vec_mode;
4648   int op_type;
4649   optab optab, reduc_optab;
4650   tree new_temp = NULL_TREE;
4651   tree def;
4652   gimple def_stmt;
4653   enum vect_def_type dt;
4654   gimple new_phi = NULL;
4655   tree scalar_type;
4656   bool is_simple_use;
4657   gimple orig_stmt;
4658   stmt_vec_info orig_stmt_info;
4659   tree expr = NULL_TREE;
4660   int i;
4661   int ncopies;
4662   int epilog_copies;
4663   stmt_vec_info prev_stmt_info, prev_phi_info;
4664   bool single_defuse_cycle = false;
4665   tree reduc_def = NULL_TREE;
4666   gimple new_stmt = NULL;
4667   int j;
4668   tree ops[3];
4669   bool nested_cycle = false, found_nested_cycle_def = false;
4670   gimple reduc_def_stmt = NULL;
4671   /* The default is that the reduction variable is the last in statement.  */
4672   int reduc_index = 2;
4673   bool double_reduc = false, dummy;
4674   basic_block def_bb;
4675   struct loop * def_stmt_loop, *outer_loop = NULL;
4676   tree def_arg;
4677   gimple def_arg_stmt;
4678   vec<tree> vec_oprnds0 = vNULL;
4679   vec<tree> vec_oprnds1 = vNULL;
4680   vec<tree> vect_defs = vNULL;
4681   vec<gimple> phis = vNULL;
4682   int vec_num;
4683   tree def0, def1, tem, op0, op1 = NULL_TREE;
4684
4685   /* In case of reduction chain we switch to the first stmt in the chain, but
4686      we don't update STMT_INFO, since only the last stmt is marked as reduction
4687      and has reduction properties.  */
4688   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4689     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4690
4691   if (nested_in_vect_loop_p (loop, stmt))
4692     {
4693       outer_loop = loop;
4694       loop = loop->inner;
4695       nested_cycle = true;
4696     }
4697
4698   /* 1. Is vectorizable reduction?  */
4699   /* Not supportable if the reduction variable is used in the loop, unless
4700      it's a reduction chain.  */
4701   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4702       && !GROUP_FIRST_ELEMENT (stmt_info))
4703     return false;
4704
4705   /* Reductions that are not used even in an enclosing outer-loop,
4706      are expected to be "live" (used out of the loop).  */
4707   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4708       && !STMT_VINFO_LIVE_P (stmt_info))
4709     return false;
4710
4711   /* Make sure it was already recognized as a reduction computation.  */
4712   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4713       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4714     return false;
4715
4716   /* 2. Has this been recognized as a reduction pattern?
4717
4718      Check if STMT represents a pattern that has been recognized
4719      in earlier analysis stages.  For stmts that represent a pattern,
4720      the STMT_VINFO_RELATED_STMT field records the last stmt in
4721      the original sequence that constitutes the pattern.  */
4722
4723   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4724   if (orig_stmt)
4725     {
4726       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4727       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4728       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4729     }
4730
4731   /* 3. Check the operands of the operation.  The first operands are defined
4732         inside the loop body. The last operand is the reduction variable,
4733         which is defined by the loop-header-phi.  */
4734
4735   gcc_assert (is_gimple_assign (stmt));
4736
4737   /* Flatten RHS.  */
4738   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4739     {
4740     case GIMPLE_SINGLE_RHS:
4741       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4742       if (op_type == ternary_op)
4743         {
4744           tree rhs = gimple_assign_rhs1 (stmt);
4745           ops[0] = TREE_OPERAND (rhs, 0);
4746           ops[1] = TREE_OPERAND (rhs, 1);
4747           ops[2] = TREE_OPERAND (rhs, 2);
4748           code = TREE_CODE (rhs);
4749         }
4750       else
4751         return false;
4752       break;
4753
4754     case GIMPLE_BINARY_RHS:
4755       code = gimple_assign_rhs_code (stmt);
4756       op_type = TREE_CODE_LENGTH (code);
4757       gcc_assert (op_type == binary_op);
4758       ops[0] = gimple_assign_rhs1 (stmt);
4759       ops[1] = gimple_assign_rhs2 (stmt);
4760       break;
4761
4762     case GIMPLE_TERNARY_RHS:
4763       code = gimple_assign_rhs_code (stmt);
4764       op_type = TREE_CODE_LENGTH (code);
4765       gcc_assert (op_type == ternary_op);
4766       ops[0] = gimple_assign_rhs1 (stmt);
4767       ops[1] = gimple_assign_rhs2 (stmt);
4768       ops[2] = gimple_assign_rhs3 (stmt);
4769       break;
4770
4771     case GIMPLE_UNARY_RHS:
4772       return false;
4773
4774     default:
4775       gcc_unreachable ();
4776     }
4777
4778   if (code == COND_EXPR && slp_node)
4779     return false;
4780
4781   scalar_dest = gimple_assign_lhs (stmt);
4782   scalar_type = TREE_TYPE (scalar_dest);
4783   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4784       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4785     return false;
4786
4787   /* Do not try to vectorize bit-precision reductions.  */
4788   if ((TYPE_PRECISION (scalar_type)
4789        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4790     return false;
4791
4792   /* All uses but the last are expected to be defined in the loop.
4793      The last use is the reduction variable.  In case of nested cycle this
4794      assumption is not true: we use reduc_index to record the index of the
4795      reduction variable.  */
4796   for (i = 0; i < op_type - 1; i++)
4797     {
4798       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4799       if (i == 0 && code == COND_EXPR)
4800         continue;
4801
4802       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4803                                             &def_stmt, &def, &dt, &tem);
4804       if (!vectype_in)
4805         vectype_in = tem;
4806       gcc_assert (is_simple_use);
4807
4808       if (dt != vect_internal_def
4809           && dt != vect_external_def
4810           && dt != vect_constant_def
4811           && dt != vect_induction_def
4812           && !(dt == vect_nested_cycle && nested_cycle))
4813         return false;
4814
4815       if (dt == vect_nested_cycle)
4816         {
4817           found_nested_cycle_def = true;
4818           reduc_def_stmt = def_stmt;
4819           reduc_index = i;
4820         }
4821     }
4822
4823   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4824                                         &def_stmt, &def, &dt, &tem);
4825   if (!vectype_in)
4826     vectype_in = tem;
4827   gcc_assert (is_simple_use);
4828   if (!(dt == vect_reduction_def
4829         || dt == vect_nested_cycle
4830         || ((dt == vect_internal_def || dt == vect_external_def
4831              || dt == vect_constant_def || dt == vect_induction_def)
4832             && nested_cycle && found_nested_cycle_def)))
4833     {
4834       /* For pattern recognized stmts, orig_stmt might be a reduction,
4835          but some helper statements for the pattern might not, or
4836          might be COND_EXPRs with reduction uses in the condition.  */
4837       gcc_assert (orig_stmt);
4838       return false;
4839     }
4840   if (!found_nested_cycle_def)
4841     reduc_def_stmt = def_stmt;
4842
4843   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4844   if (orig_stmt)
4845     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4846                                                        reduc_def_stmt,
4847                                                        !nested_cycle,
4848                                                        &dummy));
4849   else
4850     {
4851       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4852                                              !nested_cycle, &dummy);
4853       /* We changed STMT to be the first stmt in reduction chain, hence we
4854          check that in this case the first element in the chain is STMT.  */
4855       gcc_assert (stmt == tmp
4856                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4857     }
4858
4859   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4860     return false;
4861
4862   if (slp_node || PURE_SLP_STMT (stmt_info))
4863     ncopies = 1;
4864   else
4865     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4866                / TYPE_VECTOR_SUBPARTS (vectype_in));
4867
4868   gcc_assert (ncopies >= 1);
4869
4870   vec_mode = TYPE_MODE (vectype_in);
4871
4872   if (code == COND_EXPR)
4873     {
4874       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4875         {
4876           if (dump_enabled_p ())
4877             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4878                              "unsupported condition in reduction\n");
4879
4880             return false;
4881         }
4882     }
4883   else
4884     {
4885       /* 4. Supportable by target?  */
4886
4887       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
4888           || code == LROTATE_EXPR || code == RROTATE_EXPR)
4889         {
4890           /* Shifts and rotates are only supported by vectorizable_shifts,
4891              not vectorizable_reduction.  */
4892           if (dump_enabled_p ())
4893             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4894                              "unsupported shift or rotation.\n");
4895           return false;
4896         }
4897
4898       /* 4.1. check support for the operation in the loop  */
4899       optab = optab_for_tree_code (code, vectype_in, optab_default);
4900       if (!optab)
4901         {
4902           if (dump_enabled_p ())
4903             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4904                              "no optab.\n");
4905
4906           return false;
4907         }
4908
4909       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4910         {
4911           if (dump_enabled_p ())
4912             dump_printf (MSG_NOTE, "op not supported by target.\n");
4913
4914           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4915               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4916                   < vect_min_worthwhile_factor (code))
4917             return false;
4918
4919           if (dump_enabled_p ())
4920             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
4921         }
4922
4923       /* Worthwhile without SIMD support?  */
4924       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4925           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4926              < vect_min_worthwhile_factor (code))
4927         {
4928           if (dump_enabled_p ())
4929             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4930                              "not worthwhile without SIMD support.\n");
4931
4932           return false;
4933         }
4934     }
4935
4936   /* 4.2. Check support for the epilog operation.
4937
4938           If STMT represents a reduction pattern, then the type of the
4939           reduction variable may be different than the type of the rest
4940           of the arguments.  For example, consider the case of accumulation
4941           of shorts into an int accumulator; The original code:
4942                         S1: int_a = (int) short_a;
4943           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4944
4945           was replaced with:
4946                         STMT: int_acc = widen_sum <short_a, int_acc>
4947
4948           This means that:
4949           1. The tree-code that is used to create the vector operation in the
4950              epilog code (that reduces the partial results) is not the
4951              tree-code of STMT, but is rather the tree-code of the original
4952              stmt from the pattern that STMT is replacing.  I.e, in the example
4953              above we want to use 'widen_sum' in the loop, but 'plus' in the
4954              epilog.
4955           2. The type (mode) we use to check available target support
4956              for the vector operation to be created in the *epilog*, is
4957              determined by the type of the reduction variable (in the example
4958              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4959              However the type (mode) we use to check available target support
4960              for the vector operation to be created *inside the loop*, is
4961              determined by the type of the other arguments to STMT (in the
4962              example we'd check this: optab_handler (widen_sum_optab,
4963              vect_short_mode)).
4964
4965           This is contrary to "regular" reductions, in which the types of all
4966           the arguments are the same as the type of the reduction variable.
4967           For "regular" reductions we can therefore use the same vector type
4968           (and also the same tree-code) when generating the epilog code and
4969           when generating the code inside the loop.  */
4970
4971   if (orig_stmt)
4972     {
4973       /* This is a reduction pattern: get the vectype from the type of the
4974          reduction variable, and get the tree-code from orig_stmt.  */
4975       orig_code = gimple_assign_rhs_code (orig_stmt);
4976       gcc_assert (vectype_out);
4977       vec_mode = TYPE_MODE (vectype_out);
4978     }
4979   else
4980     {
4981       /* Regular reduction: use the same vectype and tree-code as used for
4982          the vector code inside the loop can be used for the epilog code. */
4983       orig_code = code;
4984     }
4985
4986   if (nested_cycle)
4987     {
4988       def_bb = gimple_bb (reduc_def_stmt);
4989       def_stmt_loop = def_bb->loop_father;
4990       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4991                                        loop_preheader_edge (def_stmt_loop));
4992       if (TREE_CODE (def_arg) == SSA_NAME
4993           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4994           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4995           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4996           && vinfo_for_stmt (def_arg_stmt)
4997           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4998               == vect_double_reduction_def)
4999         double_reduc = true;
5000     }
5001
5002   epilog_reduc_code = ERROR_MARK;
5003   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5004     {
5005       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5006                                          optab_default);
5007       if (!reduc_optab)
5008         {
5009           if (dump_enabled_p ())
5010             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5011                              "no optab for reduction.\n");
5012
5013           epilog_reduc_code = ERROR_MARK;
5014         }
5015
5016       if (reduc_optab
5017           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5018         {
5019           if (dump_enabled_p ())
5020             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5021                              "reduc op not supported by target.\n");
5022
5023           epilog_reduc_code = ERROR_MARK;
5024         }
5025     }
5026   else
5027     {
5028       if (!nested_cycle || double_reduc)
5029         {
5030           if (dump_enabled_p ())
5031             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5032                              "no reduc code for scalar code.\n");
5033
5034           return false;
5035         }
5036     }
5037
5038   if (double_reduc && ncopies > 1)
5039     {
5040       if (dump_enabled_p ())
5041         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5042                          "multiple types in double reduction\n");
5043
5044       return false;
5045     }
5046
5047   /* In case of widenning multiplication by a constant, we update the type
5048      of the constant to be the type of the other operand.  We check that the
5049      constant fits the type in the pattern recognition pass.  */
5050   if (code == DOT_PROD_EXPR
5051       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5052     {
5053       if (TREE_CODE (ops[0]) == INTEGER_CST)
5054         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5055       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5056         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5057       else
5058         {
5059           if (dump_enabled_p ())
5060             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5061                              "invalid types in dot-prod\n");
5062
5063           return false;
5064         }
5065     }
5066
5067   if (!vec_stmt) /* transformation not required.  */
5068     {
5069       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
5070         return false;
5071       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5072       return true;
5073     }
5074
5075   /** Transform.  **/
5076
5077   if (dump_enabled_p ())
5078     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5079
5080   /* FORNOW: Multiple types are not supported for condition.  */
5081   if (code == COND_EXPR)
5082     gcc_assert (ncopies == 1);
5083
5084   /* Create the destination vector  */
5085   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5086
5087   /* In case the vectorization factor (VF) is bigger than the number
5088      of elements that we can fit in a vectype (nunits), we have to generate
5089      more than one vector stmt - i.e - we need to "unroll" the
5090      vector stmt by a factor VF/nunits.  For more details see documentation
5091      in vectorizable_operation.  */
5092
5093   /* If the reduction is used in an outer loop we need to generate
5094      VF intermediate results, like so (e.g. for ncopies=2):
5095         r0 = phi (init, r0)
5096         r1 = phi (init, r1)
5097         r0 = x0 + r0;
5098         r1 = x1 + r1;
5099     (i.e. we generate VF results in 2 registers).
5100     In this case we have a separate def-use cycle for each copy, and therefore
5101     for each copy we get the vector def for the reduction variable from the
5102     respective phi node created for this copy.
5103
5104     Otherwise (the reduction is unused in the loop nest), we can combine
5105     together intermediate results, like so (e.g. for ncopies=2):
5106         r = phi (init, r)
5107         r = x0 + r;
5108         r = x1 + r;
5109    (i.e. we generate VF/2 results in a single register).
5110    In this case for each copy we get the vector def for the reduction variable
5111    from the vectorized reduction operation generated in the previous iteration.
5112   */
5113
5114   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5115     {
5116       single_defuse_cycle = true;
5117       epilog_copies = 1;
5118     }
5119   else
5120     epilog_copies = ncopies;
5121
5122   prev_stmt_info = NULL;
5123   prev_phi_info = NULL;
5124   if (slp_node)
5125     {
5126       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5127       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5128                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5129     }
5130   else
5131     {
5132       vec_num = 1;
5133       vec_oprnds0.create (1);
5134       if (op_type == ternary_op)
5135         vec_oprnds1.create (1);
5136     }
5137
5138   phis.create (vec_num);
5139   vect_defs.create (vec_num);
5140   if (!slp_node)
5141     vect_defs.quick_push (NULL_TREE);
5142
5143   for (j = 0; j < ncopies; j++)
5144     {
5145       if (j == 0 || !single_defuse_cycle)
5146         {
5147           for (i = 0; i < vec_num; i++)
5148             {
5149               /* Create the reduction-phi that defines the reduction
5150                  operand.  */
5151               new_phi = create_phi_node (vec_dest, loop->header);
5152               set_vinfo_for_stmt (new_phi,
5153                                   new_stmt_vec_info (new_phi, loop_vinfo,
5154                                                      NULL));
5155                if (j == 0 || slp_node)
5156                  phis.quick_push (new_phi);
5157             }
5158         }
5159
5160       if (code == COND_EXPR)
5161         {
5162           gcc_assert (!slp_node);
5163           vectorizable_condition (stmt, gsi, vec_stmt,
5164                                   PHI_RESULT (phis[0]),
5165                                   reduc_index, NULL);
5166           /* Multiple types are not supported for condition.  */
5167           break;
5168         }
5169
5170       /* Handle uses.  */
5171       if (j == 0)
5172         {
5173           op0 = ops[!reduc_index];
5174           if (op_type == ternary_op)
5175             {
5176               if (reduc_index == 0)
5177                 op1 = ops[2];
5178               else
5179                 op1 = ops[1];
5180             }
5181
5182           if (slp_node)
5183             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5184                                slp_node, -1);
5185           else
5186             {
5187               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5188                                                             stmt, NULL);
5189               vec_oprnds0.quick_push (loop_vec_def0);
5190               if (op_type == ternary_op)
5191                {
5192                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5193                                                                NULL);
5194                  vec_oprnds1.quick_push (loop_vec_def1);
5195                }
5196             }
5197         }
5198       else
5199         {
5200           if (!slp_node)
5201             {
5202               enum vect_def_type dt;
5203               gimple dummy_stmt;
5204               tree dummy;
5205
5206               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5207                                   &dummy_stmt, &dummy, &dt);
5208               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5209                                                               loop_vec_def0);
5210               vec_oprnds0[0] = loop_vec_def0;
5211               if (op_type == ternary_op)
5212                 {
5213                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5214                                       &dummy, &dt);
5215                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5216                                                                 loop_vec_def1);
5217                   vec_oprnds1[0] = loop_vec_def1;
5218                 }
5219             }
5220
5221           if (single_defuse_cycle)
5222             reduc_def = gimple_assign_lhs (new_stmt);
5223
5224           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5225         }
5226
5227       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5228         {
5229           if (slp_node)
5230             reduc_def = PHI_RESULT (phis[i]);
5231           else
5232             {
5233               if (!single_defuse_cycle || j == 0)
5234                 reduc_def = PHI_RESULT (new_phi);
5235             }
5236
5237           def1 = ((op_type == ternary_op)
5238                   ? vec_oprnds1[i] : NULL);
5239           if (op_type == binary_op)
5240             {
5241               if (reduc_index == 0)
5242                 expr = build2 (code, vectype_out, reduc_def, def0);
5243               else
5244                 expr = build2 (code, vectype_out, def0, reduc_def);
5245             }
5246           else
5247             {
5248               if (reduc_index == 0)
5249                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5250               else
5251                 {
5252                   if (reduc_index == 1)
5253                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5254                   else
5255                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5256                 }
5257             }
5258
5259           new_stmt = gimple_build_assign (vec_dest, expr);
5260           new_temp = make_ssa_name (vec_dest, new_stmt);
5261           gimple_assign_set_lhs (new_stmt, new_temp);
5262           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5263
5264           if (slp_node)
5265             {
5266               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5267               vect_defs.quick_push (new_temp);
5268             }
5269           else
5270             vect_defs[0] = new_temp;
5271         }
5272
5273       if (slp_node)
5274         continue;
5275
5276       if (j == 0)
5277         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5278       else
5279         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5280
5281       prev_stmt_info = vinfo_for_stmt (new_stmt);
5282       prev_phi_info = vinfo_for_stmt (new_phi);
5283     }
5284
5285   /* Finalize the reduction-phi (set its arguments) and create the
5286      epilog reduction code.  */
5287   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5288     {
5289       new_temp = gimple_assign_lhs (*vec_stmt);
5290       vect_defs[0] = new_temp;
5291     }
5292
5293   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5294                                     epilog_reduc_code, phis, reduc_index,
5295                                     double_reduc, slp_node);
5296
5297   phis.release ();
5298   vect_defs.release ();
5299   vec_oprnds0.release ();
5300   vec_oprnds1.release ();
5301
5302   return true;
5303 }
5304
5305 /* Function vect_min_worthwhile_factor.
5306
5307    For a loop where we could vectorize the operation indicated by CODE,
5308    return the minimum vectorization factor that makes it worthwhile
5309    to use generic vectors.  */
5310 int
5311 vect_min_worthwhile_factor (enum tree_code code)
5312 {
5313   switch (code)
5314     {
5315     case PLUS_EXPR:
5316     case MINUS_EXPR:
5317     case NEGATE_EXPR:
5318       return 4;
5319
5320     case BIT_AND_EXPR:
5321     case BIT_IOR_EXPR:
5322     case BIT_XOR_EXPR:
5323     case BIT_NOT_EXPR:
5324       return 2;
5325
5326     default:
5327       return INT_MAX;
5328     }
5329 }
5330
5331
5332 /* Function vectorizable_induction
5333
5334    Check if PHI performs an induction computation that can be vectorized.
5335    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5336    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5337    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5338
5339 bool
5340 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5341                         gimple *vec_stmt)
5342 {
5343   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5344   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5345   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5346   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5347   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5348   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5349   tree vec_def;
5350
5351   gcc_assert (ncopies >= 1);
5352   /* FORNOW. These restrictions should be relaxed.  */
5353   if (nested_in_vect_loop_p (loop, phi))
5354     {
5355       imm_use_iterator imm_iter;
5356       use_operand_p use_p;
5357       gimple exit_phi;
5358       edge latch_e;
5359       tree loop_arg;
5360
5361       if (ncopies > 1)
5362         {
5363           if (dump_enabled_p ())
5364             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365                              "multiple types in nested loop.\n");
5366           return false;
5367         }
5368
5369       exit_phi = NULL;
5370       latch_e = loop_latch_edge (loop->inner);
5371       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5372       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5373         {
5374           if (!flow_bb_inside_loop_p (loop->inner,
5375                                       gimple_bb (USE_STMT (use_p))))
5376             {
5377               exit_phi = USE_STMT (use_p);
5378               break;
5379             }
5380         }
5381       if (exit_phi)
5382         {
5383           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5384           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5385                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5386             {
5387               if (dump_enabled_p ())
5388                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5389                                  "inner-loop induction only used outside "
5390                                  "of the outer vectorized loop.\n");
5391               return false;
5392             }
5393         }
5394     }
5395
5396   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5397     return false;
5398
5399   /* FORNOW: SLP not supported.  */
5400   if (STMT_SLP_TYPE (stmt_info))
5401     return false;
5402
5403   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5404
5405   if (gimple_code (phi) != GIMPLE_PHI)
5406     return false;
5407
5408   if (!vec_stmt) /* transformation not required.  */
5409     {
5410       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5411       if (dump_enabled_p ())
5412         dump_printf_loc (MSG_NOTE, vect_location,
5413                          "=== vectorizable_induction ===\n");
5414       vect_model_induction_cost (stmt_info, ncopies);
5415       return true;
5416     }
5417
5418   /** Transform.  **/
5419
5420   if (dump_enabled_p ())
5421     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5422
5423   vec_def = get_initial_def_for_induction (phi);
5424   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5425   return true;
5426 }
5427
5428 /* Function vectorizable_live_operation.
5429
5430    STMT computes a value that is used outside the loop.  Check if
5431    it can be supported.  */
5432
5433 bool
5434 vectorizable_live_operation (gimple stmt,
5435                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5436                              gimple *vec_stmt)
5437 {
5438   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5439   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5440   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5441   int i;
5442   int op_type;
5443   tree op;
5444   tree def;
5445   gimple def_stmt;
5446   enum vect_def_type dt;
5447   enum tree_code code;
5448   enum gimple_rhs_class rhs_class;
5449
5450   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5451
5452   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5453     return false;
5454
5455   if (!is_gimple_assign (stmt))
5456     {
5457       if (gimple_call_internal_p (stmt)
5458           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5459           && gimple_call_lhs (stmt)
5460           && loop->simduid
5461           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5462           && loop->simduid
5463              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5464         {
5465           edge e = single_exit (loop);
5466           basic_block merge_bb = e->dest;
5467           imm_use_iterator imm_iter;
5468           use_operand_p use_p;
5469           tree lhs = gimple_call_lhs (stmt);
5470
5471           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5472             {
5473               gimple use_stmt = USE_STMT (use_p);
5474               if (gimple_code (use_stmt) == GIMPLE_PHI
5475                   || gimple_bb (use_stmt) == merge_bb)
5476                 {
5477                   if (vec_stmt)
5478                     {
5479                       tree vfm1
5480                         = build_int_cst (unsigned_type_node,
5481                                          loop_vinfo->vectorization_factor - 1);
5482                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5483                     }
5484                   return true;
5485                 }
5486             }
5487         }
5488
5489       return false;
5490     }
5491
5492   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5493     return false;
5494
5495   /* FORNOW. CHECKME. */
5496   if (nested_in_vect_loop_p (loop, stmt))
5497     return false;
5498
5499   code = gimple_assign_rhs_code (stmt);
5500   op_type = TREE_CODE_LENGTH (code);
5501   rhs_class = get_gimple_rhs_class (code);
5502   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5503   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5504
5505   /* FORNOW: support only if all uses are invariant.  This means
5506      that the scalar operations can remain in place, unvectorized.
5507      The original last scalar value that they compute will be used.  */
5508
5509   for (i = 0; i < op_type; i++)
5510     {
5511       if (rhs_class == GIMPLE_SINGLE_RHS)
5512         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5513       else
5514         op = gimple_op (stmt, i + 1);
5515       if (op
5516           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5517                                   &dt))
5518         {
5519           if (dump_enabled_p ())
5520             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5521                              "use not simple.\n");
5522           return false;
5523         }
5524
5525       if (dt != vect_external_def && dt != vect_constant_def)
5526         return false;
5527     }
5528
5529   /* No transformation is required for the cases we currently support.  */
5530   return true;
5531 }
5532
5533 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5534
5535 static void
5536 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5537 {
5538   ssa_op_iter op_iter;
5539   imm_use_iterator imm_iter;
5540   def_operand_p def_p;
5541   gimple ustmt;
5542
5543   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5544     {
5545       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5546         {
5547           basic_block bb;
5548
5549           if (!is_gimple_debug (ustmt))
5550             continue;
5551
5552           bb = gimple_bb (ustmt);
5553
5554           if (!flow_bb_inside_loop_p (loop, bb))
5555             {
5556               if (gimple_debug_bind_p (ustmt))
5557                 {
5558                   if (dump_enabled_p ())
5559                     dump_printf_loc (MSG_NOTE, vect_location,
5560                                      "killing debug use\n");
5561
5562                   gimple_debug_bind_reset_value (ustmt);
5563                   update_stmt (ustmt);
5564                 }
5565               else
5566                 gcc_unreachable ();
5567             }
5568         }
5569     }
5570 }
5571
5572 /* Function vect_transform_loop.
5573
5574    The analysis phase has determined that the loop is vectorizable.
5575    Vectorize the loop - created vectorized stmts to replace the scalar
5576    stmts in the loop, and update the loop exit condition.  */
5577
5578 void
5579 vect_transform_loop (loop_vec_info loop_vinfo)
5580 {
5581   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5582   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5583   int nbbs = loop->num_nodes;
5584   gimple_stmt_iterator si;
5585   int i;
5586   tree ratio = NULL;
5587   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5588   bool grouped_store;
5589   bool slp_scheduled = false;
5590   unsigned int nunits;
5591   gimple stmt, pattern_stmt;
5592   gimple_seq pattern_def_seq = NULL;
5593   gimple_stmt_iterator pattern_def_si = gsi_none ();
5594   bool transform_pattern_stmt = false;
5595   bool check_profitability = false;
5596   int th;
5597   /* Record number of iterations before we started tampering with the profile. */
5598   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5599
5600   if (dump_enabled_p ())
5601     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5602
5603   /* If profile is inprecise, we have chance to fix it up.  */
5604   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5605     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5606
5607   /* Use the more conservative vectorization threshold.  If the number
5608      of iterations is constant assume the cost check has been performed
5609      by our caller.  If the threshold makes all loops profitable that
5610      run at least the vectorization factor number of times checking
5611      is pointless, too.  */
5612   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5613          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5614   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5615   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5616       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5617     {
5618       if (dump_enabled_p ())
5619         dump_printf_loc (MSG_NOTE, vect_location,
5620                          "Profitability threshold is %d loop iterations.\n",
5621                          th);
5622       check_profitability = true;
5623     }
5624
5625   /* Version the loop first, if required, so the profitability check
5626      comes first.  */
5627
5628   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5629       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5630     {
5631       vect_loop_versioning (loop_vinfo, th, check_profitability);
5632       check_profitability = false;
5633     }
5634
5635   /* Peel the loop if there are data refs with unknown alignment.
5636      Only one data ref with unknown store is allowed.  */
5637
5638   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5639     {
5640       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5641       check_profitability = false;
5642     }
5643
5644   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5645      compile time constant), or it is a constant that doesn't divide by the
5646      vectorization factor, then an epilog loop needs to be created.
5647      We therefore duplicate the loop: the original loop will be vectorized,
5648      and will compute the first (n/VF) iterations.  The second copy of the loop
5649      will remain scalar and will compute the remaining (n%VF) iterations.
5650      (VF is the vectorization factor).  */
5651
5652   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5653        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5654            && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5655        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5656     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5657                                     th, check_profitability);
5658   else
5659     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5660                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5661
5662   /* 1) Make sure the loop header has exactly two entries
5663      2) Make sure we have a preheader basic block.  */
5664
5665   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5666
5667   split_edge (loop_preheader_edge (loop));
5668
5669   /* FORNOW: the vectorizer supports only loops which body consist
5670      of one basic block (header + empty latch). When the vectorizer will
5671      support more involved loop forms, the order by which the BBs are
5672      traversed need to be reconsidered.  */
5673
5674   for (i = 0; i < nbbs; i++)
5675     {
5676       basic_block bb = bbs[i];
5677       stmt_vec_info stmt_info;
5678       gimple phi;
5679
5680       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5681         {
5682           phi = gsi_stmt (si);
5683           if (dump_enabled_p ())
5684             {
5685               dump_printf_loc (MSG_NOTE, vect_location,
5686                                "------>vectorizing phi: ");
5687               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5688               dump_printf (MSG_NOTE, "\n");
5689             }
5690           stmt_info = vinfo_for_stmt (phi);
5691           if (!stmt_info)
5692             continue;
5693
5694           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5695             vect_loop_kill_debug_uses (loop, phi);
5696
5697           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5698               && !STMT_VINFO_LIVE_P (stmt_info))
5699             continue;
5700
5701           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5702                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5703               && dump_enabled_p ())
5704             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
5705
5706           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5707             {
5708               if (dump_enabled_p ())
5709                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
5710               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5711             }
5712         }
5713
5714       pattern_stmt = NULL;
5715       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5716         {
5717           bool is_store;
5718
5719           if (transform_pattern_stmt)
5720             stmt = pattern_stmt;
5721           else
5722             {
5723               stmt = gsi_stmt (si);
5724               /* During vectorization remove existing clobber stmts.  */
5725               if (gimple_clobber_p (stmt))
5726                 {
5727                   unlink_stmt_vdef (stmt);
5728                   gsi_remove (&si, true);
5729                   release_defs (stmt);
5730                   continue;
5731                 }
5732             }
5733
5734           if (dump_enabled_p ())
5735             {
5736               dump_printf_loc (MSG_NOTE, vect_location,
5737                                "------>vectorizing statement: ");
5738               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5739               dump_printf (MSG_NOTE, "\n");
5740             }
5741
5742           stmt_info = vinfo_for_stmt (stmt);
5743
5744           /* vector stmts created in the outer-loop during vectorization of
5745              stmts in an inner-loop may not have a stmt_info, and do not
5746              need to be vectorized.  */
5747           if (!stmt_info)
5748             {
5749               gsi_next (&si);
5750               continue;
5751             }
5752
5753           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5754             vect_loop_kill_debug_uses (loop, stmt);
5755
5756           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5757               && !STMT_VINFO_LIVE_P (stmt_info))
5758             {
5759               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5760                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5761                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5762                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5763                 {
5764                   stmt = pattern_stmt;
5765                   stmt_info = vinfo_for_stmt (stmt);
5766                 }
5767               else
5768                 {
5769                   gsi_next (&si);
5770                   continue;
5771                 }
5772             }
5773           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5774                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5775                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5776                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5777             transform_pattern_stmt = true;
5778
5779           /* If pattern statement has def stmts, vectorize them too.  */
5780           if (is_pattern_stmt_p (stmt_info))
5781             {
5782               if (pattern_def_seq == NULL)
5783                 {
5784                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5785                   pattern_def_si = gsi_start (pattern_def_seq);
5786                 }
5787               else if (!gsi_end_p (pattern_def_si))
5788                 gsi_next (&pattern_def_si);
5789               if (pattern_def_seq != NULL)
5790                 {
5791                   gimple pattern_def_stmt = NULL;
5792                   stmt_vec_info pattern_def_stmt_info = NULL;
5793
5794                   while (!gsi_end_p (pattern_def_si))
5795                     {
5796                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5797                       pattern_def_stmt_info
5798                         = vinfo_for_stmt (pattern_def_stmt);
5799                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5800                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5801                         break;
5802                       gsi_next (&pattern_def_si);
5803                     }
5804
5805                   if (!gsi_end_p (pattern_def_si))
5806                     {
5807                       if (dump_enabled_p ())
5808                         {
5809                           dump_printf_loc (MSG_NOTE, vect_location,
5810                                            "==> vectorizing pattern def "
5811                                            "stmt: ");
5812                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5813                                             pattern_def_stmt, 0);
5814                           dump_printf (MSG_NOTE, "\n");
5815                         }
5816
5817                       stmt = pattern_def_stmt;
5818                       stmt_info = pattern_def_stmt_info;
5819                     }
5820                   else
5821                     {
5822                       pattern_def_si = gsi_none ();
5823                       transform_pattern_stmt = false;
5824                     }
5825                 }
5826               else
5827                 transform_pattern_stmt = false;
5828             }
5829
5830           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5831           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5832                                                STMT_VINFO_VECTYPE (stmt_info));
5833           if (!STMT_SLP_TYPE (stmt_info)
5834               && nunits != (unsigned int) vectorization_factor
5835               && dump_enabled_p ())
5836             /* For SLP VF is set according to unrolling factor, and not to
5837                vector size, hence for SLP this print is not valid.  */
5838             dump_printf_loc (MSG_NOTE, vect_location,
5839                              "multiple-types.\n");
5840
5841           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5842              reached.  */
5843           if (STMT_SLP_TYPE (stmt_info))
5844             {
5845               if (!slp_scheduled)
5846                 {
5847                   slp_scheduled = true;
5848
5849                   if (dump_enabled_p ())
5850                     dump_printf_loc (MSG_NOTE, vect_location,
5851                                      "=== scheduling SLP instances ===\n");
5852
5853                   vect_schedule_slp (loop_vinfo, NULL);
5854                 }
5855
5856               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5857               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5858                 {
5859                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5860                     {
5861                       pattern_def_seq = NULL;
5862                       gsi_next (&si);
5863                     }
5864                   continue;
5865                 }
5866             }
5867
5868           /* -------- vectorize statement ------------ */
5869           if (dump_enabled_p ())
5870             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
5871
5872           grouped_store = false;
5873           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5874           if (is_store)
5875             {
5876               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5877                 {
5878                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5879                      interleaving chain was completed - free all the stores in
5880                      the chain.  */
5881                   gsi_next (&si);
5882                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5883                   continue;
5884                 }
5885               else
5886                 {
5887                   /* Free the attached stmt_vec_info and remove the stmt.  */
5888                   gimple store = gsi_stmt (si);
5889                   free_stmt_vec_info (store);
5890                   unlink_stmt_vdef (store);
5891                   gsi_remove (&si, true);
5892                   release_defs (store);
5893                   continue;
5894                 }
5895             }
5896
5897           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5898             {
5899               pattern_def_seq = NULL;
5900               gsi_next (&si);
5901             }
5902         }                       /* stmts in BB */
5903     }                           /* BBs in loop */
5904
5905   slpeel_make_loop_iterate_ntimes (loop, ratio);
5906
5907   /* Reduce loop iterations by the vectorization factor.  */
5908   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
5909                       expected_iterations / vectorization_factor);
5910   loop->nb_iterations_upper_bound
5911     = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
5912                                             FLOOR_DIV_EXPR);
5913   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5914       && loop->nb_iterations_upper_bound != double_int_zero)
5915     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
5916   if (loop->any_estimate)
5917     {
5918       loop->nb_iterations_estimate
5919         = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
5920                                              FLOOR_DIV_EXPR);
5921        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5922            && loop->nb_iterations_estimate != double_int_zero)
5923          loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
5924     }
5925
5926   if (dump_enabled_p ())
5927     {
5928       dump_printf_loc (MSG_NOTE, vect_location,
5929                        "LOOP VECTORIZED\n");
5930       if (loop->inner)
5931         dump_printf_loc (MSG_NOTE, vect_location,
5932                          "OUTER LOOP VECTORIZED\n");
5933       dump_printf (MSG_NOTE, "\n");
5934     }
5935 }