gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   5    Ira Rosen <irar@il.ibm.com>
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "dumpfile.h"
  27 #include "tm.h"
  28 #include "ggc.h"
  29 #include "tree.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-flow.h"
  33 #include "tree-pass.h"
  34 #include "cfgloop.h"
  35 #include "expr.h"
  36 #include "recog.h"
  37 #include "optabs.h"
  38 #include "params.h"
  39 #include "diagnostic-core.h"
  40 #include "tree-chrec.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "tree-vectorizer.h"
  43 #include "target.h"
  44
  45 /* Loop Vectorization Pass.
  46
  47    This pass tries to vectorize loops.
  48
  49    For example, the vectorizer transforms the following simple loop:
  50
  51         short a[N]; short b[N]; short c[N]; int i;
  52
  53         for (i=0; i<N; i++){
  54           a[i] = b[i] + c[i];
  55         }
  56
  57    as if it was manually vectorized by rewriting the source code into:
  58
  59         typedef int __attribute__((mode(V8HI))) v8hi;
  60         short a[N];  short b[N]; short c[N];   int i;
  61         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  62         v8hi va, vb, vc;
  63
  64         for (i=0; i<N/8; i++){
  65           vb = pb[i];
  66           vc = pc[i];
  67           va = vb + vc;
  68           pa[i] = va;
  69         }
  70
  71         The main entry to this pass is vectorize_loops(), in which
  72    the vectorizer applies a set of analyses on a given set of loops,
  73    followed by the actual vectorization transformation for the loops that
  74    had successfully passed the analysis phase.
  75         Throughout this pass we make a distinction between two types of
  76    data: scalars (which are represented by SSA_NAMES), and memory references
  77    ("data-refs").  These two types of data require different handling both
  78    during analysis and transformation. The types of data-refs that the
  79    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  80    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  81    accesses are required to have a simple (consecutive) access pattern.
  82
  83    Analysis phase:
  84    ===============
  85         The driver for the analysis phase is vect_analyze_loop().
  86    It applies a set of analyses, some of which rely on the scalar evolution
  87    analyzer (scev) developed by Sebastian Pop.
  88
  89         During the analysis phase the vectorizer records some information
  90    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  91    loop, as well as general information about the loop as a whole, which is
  92    recorded in a "loop_vec_info" struct attached to each loop.
  93
  94    Transformation phase:
  95    =====================
  96         The loop transformation phase scans all the stmts in the loop, and
  97    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  98    the loop that needs to be vectorized.  It inserts the vector code sequence
  99    just before the scalar stmt S, and records a pointer to the vector code
 100    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 101    attached to S).  This pointer will be used for the vectorization of following
 102    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 103    otherwise, we rely on dead code elimination for removing it.
 104
 105         For example, say stmt S1 was vectorized into stmt VS1:
 106
 107    VS1: vb = px[i];
 108    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 109    S2:  a = b;
 110
 111    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 112    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 113    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 114    resulting sequence would be:
 115
 116    VS1: vb = px[i];
 117    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 118    VS2: va = vb;
 119    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 120
 121         Operands that are not SSA_NAMEs, are data-refs that appear in
 122    load/store operations (like 'x[i]' in S1), and are handled differently.
 123
 124    Target modeling:
 125    =================
 126         Currently the only target specific information that is used is the
 127    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 128    Targets that can support different sizes of vectors, for now will need
 129    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 130    flexibility will be added in the future.
 131
 132         Since we only vectorize operations which vector form can be
 133    expressed using existing tree codes, to verify that an operation is
 134    supported, the vectorizer checks the relevant optab at the relevant
 135    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 136    the value found is CODE_FOR_nothing, then there's no target support, and
 137    we can't vectorize the stmt.
 138
 139    For additional information on this project see:
 140    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 141 */
 142
 143 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 144
 145 /* Function vect_determine_vectorization_factor
 146
 147    Determine the vectorization factor (VF).  VF is the number of data elements
 148    that are operated upon in parallel in a single iteration of the vectorized
 149    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 150    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 151    elements can fit in a single vector register.
 152
 153    We currently support vectorization of loops in which all types operated upon
 154    are of the same size.  Therefore this function currently sets VF according to
 155    the size of the types operated upon, and fails if there are multiple sizes
 156    in the loop.
 157
 158    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 159    original loop:
 160         for (i=0; i<N; i++){
 161           a[i] = b[i] + c[i];
 162         }
 163
 164    vectorized loop:
 165         for (i=0; i<N; i+=VF){
 166           a[i:VF] = b[i:VF] + c[i:VF];
 167         }
 168 */
 169
 170 static bool
 171 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 172 {
 173   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 174   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 175   int nbbs = loop->num_nodes;
 176   gimple_stmt_iterator si;
 177   unsigned int vectorization_factor = 0;
 178   tree scalar_type;
 179   gimple phi;
 180   tree vectype;
 181   unsigned int nunits;
 182   stmt_vec_info stmt_info;
 183   int i;
 184   HOST_WIDE_INT dummy;
 185   gimple stmt, pattern_stmt = NULL;
 186   gimple_seq pattern_def_seq = NULL;
 187   gimple_stmt_iterator pattern_def_si = gsi_none ();
 188   bool analyze_pattern_stmt = false;
 189
 190   if (dump_kind_p (MSG_NOTE))
 191     dump_printf_loc (MSG_NOTE, vect_location,
 192                      "=== vect_determine_vectorization_factor ===");
 193
 194   for (i = 0; i < nbbs; i++)
 195     {
 196       basic_block bb = bbs[i];
 197
 198       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 199         {
 200           phi = gsi_stmt (si);
 201           stmt_info = vinfo_for_stmt (phi);
 202           if (dump_kind_p (MSG_NOTE))
 203             {
 204               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 205               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 206             }
 207
 208           gcc_assert (stmt_info);
 209
 210           if (STMT_VINFO_RELEVANT_P (stmt_info))
 211             {
 212               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 213               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 214
 215               if (dump_kind_p (MSG_NOTE))
 216                 {
 217                   dump_printf_loc (MSG_NOTE, vect_location,
 218                                    "get vectype for scalar type:  ");
 219                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 220                 }
 221
 222               vectype = get_vectype_for_scalar_type (scalar_type);
 223               if (!vectype)
 224                 {
 225                   if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 226                     {
 227                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 228                                        "not vectorized: unsupported "
 229                                        "data-type ");
 230                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 231                                          scalar_type);
 232                     }
 233                   return false;
 234                 }
 235               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 236
 237               if (dump_kind_p (MSG_NOTE))
 238                 {
 239                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 240                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 241                 }
 242
 243               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 244               if (dump_kind_p (MSG_NOTE))
 245                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
 246
 247               if (!vectorization_factor
 248                   || (nunits > vectorization_factor))
 249                 vectorization_factor = nunits;
 250             }
 251         }
 252
 253       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 254         {
 255           tree vf_vectype;
 256
 257           if (analyze_pattern_stmt)
 258             stmt = pattern_stmt;
 259           else
 260             stmt = gsi_stmt (si);
 261
 262           stmt_info = vinfo_for_stmt (stmt);
 263
 264           if (dump_kind_p (MSG_NOTE))
 265             {
 266               dump_printf_loc (MSG_NOTE, vect_location,
 267                                "==> examining statement: ");
 268               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 269             }
 270
 271           gcc_assert (stmt_info);
 272
 273           /* Skip stmts which do not need to be vectorized.  */
 274           if (!STMT_VINFO_RELEVANT_P (stmt_info)
 275               && !STMT_VINFO_LIVE_P (stmt_info))
 276             {
 277               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 278                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 279                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 280                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 281                 {
 282                   stmt = pattern_stmt;
 283                   stmt_info = vinfo_for_stmt (pattern_stmt);
 284                   if (dump_kind_p (MSG_NOTE))
 285                     {
 286                       dump_printf_loc (MSG_NOTE, vect_location,
 287                                        "==> examining pattern statement: ");
 288                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 289                     }
 290                 }
 291               else
 292                 {
 293                   if (dump_kind_p (MSG_NOTE))
 294                     dump_printf_loc (MSG_NOTE, vect_location, "skip.");
 295                   gsi_next (&si);
 296                   continue;
 297                 }
 298             }
 299           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 300                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 301                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 302                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 303             analyze_pattern_stmt = true;
 304
 305           /* If a pattern statement has def stmts, analyze them too.  */
 306           if (is_pattern_stmt_p (stmt_info))
 307             {
 308               if (pattern_def_seq == NULL)
 309                 {
 310                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 311                   pattern_def_si = gsi_start (pattern_def_seq);
 312                 }
 313               else if (!gsi_end_p (pattern_def_si))
 314                 gsi_next (&pattern_def_si);
 315               if (pattern_def_seq != NULL)
 316                 {
 317                   gimple pattern_def_stmt = NULL;
 318                   stmt_vec_info pattern_def_stmt_info = NULL;
 319
 320                   while (!gsi_end_p (pattern_def_si))
 321                     {
 322                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 323                       pattern_def_stmt_info
 324                         = vinfo_for_stmt (pattern_def_stmt);
 325                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 326                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 327                         break;
 328                       gsi_next (&pattern_def_si);
 329                     }
 330
 331                   if (!gsi_end_p (pattern_def_si))
 332                     {
 333                       if (dump_kind_p (MSG_NOTE))
 334                         {
 335                           dump_printf_loc (MSG_NOTE, vect_location,
 336                                            "==> examining pattern def stmt: ");
 337                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 338                                             pattern_def_stmt, 0);
 339                         }
 340
 341                       stmt = pattern_def_stmt;
 342                       stmt_info = pattern_def_stmt_info;
 343                     }
 344                   else
 345                     {
 346                       pattern_def_si = gsi_none ();
 347                       analyze_pattern_stmt = false;
 348                     }
 349                 }
 350               else
 351                 analyze_pattern_stmt = false;
 352             }
 353
 354           if (gimple_get_lhs (stmt) == NULL_TREE)
 355             {
 356               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 357                 {
 358                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 359                                    "not vectorized: irregular stmt.");
 360                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 361                                     0);
 362                 }
 363               return false;
 364             }
 365
 366           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 367             {
 368               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 369                 {
 370                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 371                                    "not vectorized: vector stmt in loop:");
 372                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 373                 }
 374               return false;
 375             }
 376
 377           if (STMT_VINFO_VECTYPE (stmt_info))
 378             {
 379               /* The only case when a vectype had been already set is for stmts
 380                  that contain a dataref, or for "pattern-stmts" (stmts
 381                  generated by the vectorizer to represent/replace a certain
 382                  idiom).  */
 383               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 384                           || is_pattern_stmt_p (stmt_info)
 385                           || !gsi_end_p (pattern_def_si));
 386               vectype = STMT_VINFO_VECTYPE (stmt_info);
 387             }
 388           else
 389             {
 390               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 391               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 392               if (dump_kind_p (MSG_NOTE))
 393                 {
 394                   dump_printf_loc (MSG_NOTE, vect_location,
 395                                    "get vectype for scalar type:  ");
 396                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 397                 }
 398               vectype = get_vectype_for_scalar_type (scalar_type);
 399               if (!vectype)
 400                 {
 401                   if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 402                     {
 403                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 404                                        "not vectorized: unsupported "
 405                                        "data-type ");
 406                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 407                                          scalar_type);
 408                     }
 409                   return false;
 410                 }
 411
 412               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 413             }
 414
 415           /* The vectorization factor is according to the smallest
 416              scalar type (or the largest vector size, but we only
 417              support one vector size per loop).  */
 418           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 419                                                        &dummy);
 420           if (dump_kind_p (MSG_NOTE))
 421             {
 422               dump_printf_loc (MSG_NOTE, vect_location,
 423                                "get vectype for scalar type:  ");
 424               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 425             }
 426           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 427           if (!vf_vectype)
 428             {
 429               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 430                 {
 431                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 432                                    "not vectorized: unsupported data-type ");
 433                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 434                                      scalar_type);
 435                 }
 436               return false;
 437             }
 438
 439           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 440                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 441             {
 442               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 443                 {
 444                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 445                                    "not vectorized: different sized vector "
 446                                    "types in statement, ");
 447                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 448                                      vectype);
 449                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 450                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 451                                      vf_vectype);
 452                 }
 453               return false;
 454             }
 455
 456           if (dump_kind_p (MSG_NOTE))
 457             {
 458               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 459               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 460             }
 461
 462           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 463           if (dump_kind_p (MSG_NOTE))
 464             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
 465           if (!vectorization_factor
 466               || (nunits > vectorization_factor))
 467             vectorization_factor = nunits;
 468
 469           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 470             {
 471               pattern_def_seq = NULL;
 472               gsi_next (&si);
 473             }
 474         }
 475     }
 476
 477   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 478   if (dump_kind_p (MSG_NOTE))
 479     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d",
 480                      vectorization_factor);
 481   if (vectorization_factor <= 1)
 482     {
 483       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 484         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 485                          "not vectorized: unsupported data-type");
 486       return false;
 487     }
 488   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 489
 490   return true;
 491 }
 492
 493
 494 /* Function vect_is_simple_iv_evolution.
 495
 496    FORNOW: A simple evolution of an induction variables in the loop is
 497    considered a polynomial evolution with constant step.  */
 498
 499 static bool
 500 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 501                              tree * step)
 502 {
 503   tree init_expr;
 504   tree step_expr;
 505   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 506
 507   /* When there is no evolution in this loop, the evolution function
 508      is not "simple".  */
 509   if (evolution_part == NULL_TREE)
 510     return false;
 511
 512   /* When the evolution is a polynomial of degree >= 2
 513      the evolution function is not "simple".  */
 514   if (tree_is_chrec (evolution_part))
 515     return false;
 516
 517   step_expr = evolution_part;
 518   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 519
 520   if (dump_kind_p (MSG_NOTE))
 521     {
 522       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 523       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 524       dump_printf (MSG_NOTE, ",  init: ");
 525       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 526     }
 527
 528   *init = init_expr;
 529   *step = step_expr;
 530
 531   if (TREE_CODE (step_expr) != INTEGER_CST)
 532     {
 533       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 534         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 535                          "step unknown.");
 536       return false;
 537     }
 538
 539   return true;
 540 }
 541
 542 /* Function vect_analyze_scalar_cycles_1.
 543
 544    Examine the cross iteration def-use cycles of scalar variables
 545    in LOOP.  LOOP_VINFO represents the loop that is now being
 546    considered for vectorization (can be LOOP, or an outer-loop
 547    enclosing LOOP).  */
 548
 549 static void
 550 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 551 {
 552   basic_block bb = loop->header;
 553   tree dumy;
 554   VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
 555   gimple_stmt_iterator gsi;
 556   bool double_reduc;
 557
 558   if (dump_kind_p (MSG_NOTE))
 559     dump_printf_loc (MSG_NOTE, vect_location,
 560                      "=== vect_analyze_scalar_cycles ===");
 561
 562   /* First - identify all inductions.  Reduction detection assumes that all the
 563      inductions have been identified, therefore, this order must not be
 564      changed.  */
 565   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 566     {
 567       gimple phi = gsi_stmt (gsi);
 568       tree access_fn = NULL;
 569       tree def = PHI_RESULT (phi);
 570       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 571
 572       if (dump_kind_p (MSG_NOTE))
 573         {
 574           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 575           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 576         }
 577
 578       /* Skip virtual phi's.  The data dependences that are associated with
 579          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 580       if (virtual_operand_p (def))
 581         continue;
 582
 583       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 584
 585       /* Analyze the evolution function.  */
 586       access_fn = analyze_scalar_evolution (loop, def);
 587       if (access_fn)
 588         {
 589           STRIP_NOPS (access_fn);
 590           if (dump_kind_p (MSG_NOTE))
 591             {
 592               dump_printf_loc (MSG_NOTE, vect_location,
 593                                "Access function of PHI: ");
 594               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 595             }
 596           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 597             = evolution_part_in_loop_num (access_fn, loop->num);
 598         }
 599
 600       if (!access_fn
 601           || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
 602         {
 603           VEC_safe_push (gimple, heap, worklist, phi);
 604           continue;
 605         }
 606
 607       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 608
 609       if (dump_kind_p (MSG_NOTE))
 610         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.");
 611       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 612     }
 613
 614
 615   /* Second - identify all reductions and nested cycles.  */
 616   while (VEC_length (gimple, worklist) > 0)
 617     {
 618       gimple phi = VEC_pop (gimple, worklist);
 619       tree def = PHI_RESULT (phi);
 620       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 621       gimple reduc_stmt;
 622       bool nested_cycle;
 623
 624       if (dump_kind_p (MSG_NOTE))
 625         {
 626           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 627           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 628         }
 629
 630       gcc_assert (!virtual_operand_p (def)
 631                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 632
 633       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 634       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 635                                                 &double_reduc);
 636       if (reduc_stmt)
 637         {
 638           if (double_reduc)
 639             {
 640               if (dump_kind_p (MSG_NOTE))
 641                 dump_printf_loc (MSG_NOTE, vect_location,
 642                                  "Detected double reduction.");
 643
 644               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 645               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 646                                                     vect_double_reduction_def;
 647             }
 648           else
 649             {
 650               if (nested_cycle)
 651                 {
 652                   if (dump_kind_p (MSG_NOTE))
 653                     dump_printf_loc (MSG_NOTE, vect_location,
 654                                      "Detected vectorizable nested cycle.");
 655
 656                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 657                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 658                                                              vect_nested_cycle;
 659                 }
 660               else
 661                 {
 662                   if (dump_kind_p (MSG_NOTE))
 663                     dump_printf_loc (MSG_NOTE, vect_location,
 664                                      "Detected reduction.");
 665
 666                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 667                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 668                                                            vect_reduction_def;
 669                   /* Store the reduction cycles for possible vectorization in
 670                      loop-aware SLP.  */
 671                   VEC_safe_push (gimple, heap,
 672                                  LOOP_VINFO_REDUCTIONS (loop_vinfo),
 673                                  reduc_stmt);
 674                 }
 675             }
 676         }
 677       else
 678         if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
 679           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 680                            "Unknown def-use cycle pattern.");
 681     }
 682
 683   VEC_free (gimple, heap, worklist);
 684 }
 685
 686
 687 /* Function vect_analyze_scalar_cycles.
 688
 689    Examine the cross iteration def-use cycles of scalar variables, by
 690    analyzing the loop-header PHIs of scalar variables.  Classify each
 691    cycle as one of the following: invariant, induction, reduction, unknown.
 692    We do that for the loop represented by LOOP_VINFO, and also to its
 693    inner-loop, if exists.
 694    Examples for scalar cycles:
 695
 696    Example1: reduction:
 697
 698               loop1:
 699               for (i=0; i<N; i++)
 700                  sum += a[i];
 701
 702    Example2: induction:
 703
 704               loop2:
 705               for (i=0; i<N; i++)
 706                  a[i] = i;  */
 707
 708 static void
 709 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 710 {
 711   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 712
 713   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 714
 715   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 716      Reductions in such inner-loop therefore have different properties than
 717      the reductions in the nest that gets vectorized:
 718      1. When vectorized, they are executed in the same order as in the original
 719         scalar loop, so we can't change the order of computation when
 720         vectorizing them.
 721      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 722         current checks are too strict.  */
 723
 724   if (loop->inner)
 725     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 726 }
 727
 728 /* Function vect_get_loop_niters.
 729
 730    Determine how many iterations the loop is executed.
 731    If an expression that represents the number of iterations
 732    can be constructed, place it in NUMBER_OF_ITERATIONS.
 733    Return the loop exit condition.  */
 734
 735 static gimple
 736 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 737 {
 738   tree niters;
 739
 740   if (dump_kind_p (MSG_NOTE))
 741     dump_printf_loc (MSG_NOTE, vect_location,
 742                      "=== get_loop_niters ===");
 743   niters = number_of_exit_cond_executions (loop);
 744
 745   if (niters != NULL_TREE
 746       && niters != chrec_dont_know)
 747     {
 748       *number_of_iterations = niters;
 749
 750       if (dump_kind_p (MSG_NOTE))
 751         {
 752           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 753           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 754         }
 755     }
 756
 757   return get_loop_exit_condition (loop);
 758 }
 759
 760
 761 /* Function bb_in_loop_p
 762
 763    Used as predicate for dfs order traversal of the loop bbs.  */
 764
 765 static bool
 766 bb_in_loop_p (const_basic_block bb, const void *data)
 767 {
 768   const struct loop *const loop = (const struct loop *)data;
 769   if (flow_bb_inside_loop_p (loop, bb))
 770     return true;
 771   return false;
 772 }
 773
 774
 775 /* Function new_loop_vec_info.
 776
 777    Create and initialize a new loop_vec_info struct for LOOP, as well as
 778    stmt_vec_info structs for all the stmts in LOOP.  */
 779
 780 static loop_vec_info
 781 new_loop_vec_info (struct loop *loop)
 782 {
 783   loop_vec_info res;
 784   basic_block *bbs;
 785   gimple_stmt_iterator si;
 786   unsigned int i, nbbs;
 787
 788   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 789   LOOP_VINFO_LOOP (res) = loop;
 790
 791   bbs = get_loop_body (loop);
 792
 793   /* Create/Update stmt_info for all stmts in the loop.  */
 794   for (i = 0; i < loop->num_nodes; i++)
 795     {
 796       basic_block bb = bbs[i];
 797
 798       /* BBs in a nested inner-loop will have been already processed (because
 799          we will have called vect_analyze_loop_form for any nested inner-loop).
 800          Therefore, for stmts in an inner-loop we just want to update the
 801          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 802          loop_info of the outer-loop we are currently considering to vectorize
 803          (instead of the loop_info of the inner-loop).
 804          For stmts in other BBs we need to create a stmt_info from scratch.  */
 805       if (bb->loop_father != loop)
 806         {
 807           /* Inner-loop bb.  */
 808           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 809           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 810             {
 811               gimple phi = gsi_stmt (si);
 812               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 813               loop_vec_info inner_loop_vinfo =
 814                 STMT_VINFO_LOOP_VINFO (stmt_info);
 815               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 816               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 817             }
 818           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 819            {
 820               gimple stmt = gsi_stmt (si);
 821               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 822               loop_vec_info inner_loop_vinfo =
 823                  STMT_VINFO_LOOP_VINFO (stmt_info);
 824               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 825               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 826            }
 827         }
 828       else
 829         {
 830           /* bb in current nest.  */
 831           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 832             {
 833               gimple phi = gsi_stmt (si);
 834               gimple_set_uid (phi, 0);
 835               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 836             }
 837
 838           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 839             {
 840               gimple stmt = gsi_stmt (si);
 841               gimple_set_uid (stmt, 0);
 842               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 843             }
 844         }
 845     }
 846
 847   /* CHECKME: We want to visit all BBs before their successors (except for
 848      latch blocks, for which this assertion wouldn't hold).  In the simple
 849      case of the loop forms we allow, a dfs order of the BBs would the same
 850      as reversed postorder traversal, so we are safe.  */
 851
 852    free (bbs);
 853    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 854    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 855                               bbs, loop->num_nodes, loop);
 856    gcc_assert (nbbs == loop->num_nodes);
 857
 858   LOOP_VINFO_BBS (res) = bbs;
 859   LOOP_VINFO_NITERS (res) = NULL;
 860   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 861   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 862   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 863   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 864   LOOP_VINFO_VECT_FACTOR (res) = 0;
 865   LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
 866   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
 867   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
 868   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 869   LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
 870     VEC_alloc (gimple, heap,
 871                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 872   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
 873     VEC_alloc (ddr_p, heap,
 874                PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 875   LOOP_VINFO_GROUPED_STORES (res) = VEC_alloc (gimple, heap, 10);
 876   LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
 877   LOOP_VINFO_REDUCTION_CHAINS (res) = VEC_alloc (gimple, heap, 10);
 878   LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
 879   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 880   LOOP_VINFO_PEELING_HTAB (res) = NULL;
 881   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 882   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 883   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 884
 885   return res;
 886 }
 887
 888
 889 /* Function destroy_loop_vec_info.
 890
 891    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 892    stmts in the loop.  */
 893
 894 void
 895 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 896 {
 897   struct loop *loop;
 898   basic_block *bbs;
 899   int nbbs;
 900   gimple_stmt_iterator si;
 901   int j;
 902   VEC (slp_instance, heap) *slp_instances;
 903   slp_instance instance;
 904   bool swapped;
 905
 906   if (!loop_vinfo)
 907     return;
 908
 909   loop = LOOP_VINFO_LOOP (loop_vinfo);
 910
 911   bbs = LOOP_VINFO_BBS (loop_vinfo);
 912   nbbs = loop->num_nodes;
 913   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 914
 915   if (!clean_stmts)
 916     {
 917       free (LOOP_VINFO_BBS (loop_vinfo));
 918       free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
 919       free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 920       VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
 921       VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
 922       VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
 923
 924       free (loop_vinfo);
 925       loop->aux = NULL;
 926       return;
 927     }
 928
 929   for (j = 0; j < nbbs; j++)
 930     {
 931       basic_block bb = bbs[j];
 932       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 933         free_stmt_vec_info (gsi_stmt (si));
 934
 935       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 936         {
 937           gimple stmt = gsi_stmt (si);
 938
 939           /* We may have broken canonical form by moving a constant
 940              into RHS1 of a commutative op.  Fix such occurrences.  */
 941           if (swapped && is_gimple_assign (stmt))
 942             {
 943               enum tree_code code = gimple_assign_rhs_code (stmt);
 944
 945               if ((code == PLUS_EXPR
 946                    || code == POINTER_PLUS_EXPR
 947                    || code == MULT_EXPR)
 948                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 949                 swap_tree_operands (stmt,
 950                                     gimple_assign_rhs1_ptr (stmt),
 951                                     gimple_assign_rhs2_ptr (stmt));
 952             }
 953
 954           /* Free stmt_vec_info.  */
 955           free_stmt_vec_info (stmt);
 956           gsi_next (&si);
 957         }
 958     }
 959
 960   free (LOOP_VINFO_BBS (loop_vinfo));
 961   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
 962   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 963   VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
 964   VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
 965   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
 966   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 967   FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
 968     vect_free_slp_instance (instance);
 969
 970   VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
 971   VEC_free (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo));
 972   VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
 973   VEC_free (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo));
 974
 975   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
 976     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
 977
 978   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
 979
 980   free (loop_vinfo);
 981   loop->aux = NULL;
 982 }
 983
 984
 985 /* Function vect_analyze_loop_1.
 986
 987    Apply a set of analyses on LOOP, and create a loop_vec_info struct
 988    for it. The different analyses will record information in the
 989    loop_vec_info struct.  This is a subset of the analyses applied in
 990    vect_analyze_loop, to be applied on an inner-loop nested in the loop
 991    that is now considered for (outer-loop) vectorization.  */
 992
 993 static loop_vec_info
 994 vect_analyze_loop_1 (struct loop *loop)
 995 {
 996   loop_vec_info loop_vinfo;
 997
 998   if (dump_kind_p (MSG_NOTE))
 999     dump_printf_loc (MSG_NOTE, vect_location,
1000                      "===== analyze_loop_nest_1 =====");
1001
1002   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1003
1004   loop_vinfo = vect_analyze_loop_form (loop);
1005   if (!loop_vinfo)
1006     {
1007       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1008         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1009                          "bad inner-loop form.");
1010       return NULL;
1011     }
1012
1013   return loop_vinfo;
1014 }
1015
1016
1017 /* Function vect_analyze_loop_form.
1018
1019    Verify that certain CFG restrictions hold, including:
1020    - the loop has a pre-header
1021    - the loop has a single entry and exit
1022    - the loop exit condition is simple enough, and the number of iterations
1023      can be analyzed (a countable loop).  */
1024
1025 loop_vec_info
1026 vect_analyze_loop_form (struct loop *loop)
1027 {
1028   loop_vec_info loop_vinfo;
1029   gimple loop_cond;
1030   tree number_of_iterations = NULL;
1031   loop_vec_info inner_loop_vinfo = NULL;
1032
1033   if (dump_kind_p (MSG_NOTE))
1034     dump_printf_loc (MSG_NOTE, vect_location,
1035                      "=== vect_analyze_loop_form ===");
1036
1037   /* Different restrictions apply when we are considering an inner-most loop,
1038      vs. an outer (nested) loop.
1039      (FORNOW. May want to relax some of these restrictions in the future).  */
1040
1041   if (!loop->inner)
1042     {
1043       /* Inner-most loop.  We currently require that the number of BBs is
1044          exactly 2 (the header and latch).  Vectorizable inner-most loops
1045          look like this:
1046
1047                         (pre-header)
1048                            |
1049                           header <--------+
1050                            | |            |
1051                            | +--> latch --+
1052                            |
1053                         (exit-bb)  */
1054
1055       if (loop->num_nodes != 2)
1056         {
1057           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1058             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1059                              "not vectorized: control flow in loop.");
1060           return NULL;
1061         }
1062
1063       if (empty_block_p (loop->header))
1064     {
1065           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1066             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1067                              "not vectorized: empty loop.");
1068       return NULL;
1069     }
1070     }
1071   else
1072     {
1073       struct loop *innerloop = loop->inner;
1074       edge entryedge;
1075
1076       /* Nested loop. We currently require that the loop is doubly-nested,
1077          contains a single inner loop, and the number of BBs is exactly 5.
1078          Vectorizable outer-loops look like this:
1079
1080                         (pre-header)
1081                            |
1082                           header <---+
1083                            |         |
1084                           inner-loop |
1085                            |         |
1086                           tail ------+
1087                            |
1088                         (exit-bb)
1089
1090          The inner-loop has the properties expected of inner-most loops
1091          as described above.  */
1092
1093       if ((loop->inner)->inner || (loop->inner)->next)
1094         {
1095           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1096             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1097                              "not vectorized: multiple nested loops.");
1098           return NULL;
1099         }
1100
1101       /* Analyze the inner-loop.  */
1102       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1103       if (!inner_loop_vinfo)
1104         {
1105           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1106             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1107                              "not vectorized: Bad inner loop.");
1108           return NULL;
1109         }
1110
1111       if (!expr_invariant_in_loop_p (loop,
1112                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1113         {
1114           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1115             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1116                              "not vectorized: inner-loop count not invariant.");
1117           destroy_loop_vec_info (inner_loop_vinfo, true);
1118           return NULL;
1119         }
1120
1121       if (loop->num_nodes != 5)
1122         {
1123           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1124             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1125                              "not vectorized: control flow in loop.");
1126           destroy_loop_vec_info (inner_loop_vinfo, true);
1127           return NULL;
1128         }
1129
1130       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1131       entryedge = EDGE_PRED (innerloop->header, 0);
1132       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1133         entryedge = EDGE_PRED (innerloop->header, 1);
1134
1135       if (entryedge->src != loop->header
1136           || !single_exit (innerloop)
1137           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1138         {
1139           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1140             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1141                              "not vectorized: unsupported outerloop form.");
1142           destroy_loop_vec_info (inner_loop_vinfo, true);
1143           return NULL;
1144         }
1145
1146       if (dump_kind_p (MSG_NOTE))
1147         dump_printf_loc (MSG_NOTE, vect_location,
1148                          "Considering outer-loop vectorization.");
1149     }
1150
1151   if (!single_exit (loop)
1152       || EDGE_COUNT (loop->header->preds) != 2)
1153     {
1154       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1155         {
1156           if (!single_exit (loop))
1157             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1158                              "not vectorized: multiple exits.");
1159           else if (EDGE_COUNT (loop->header->preds) != 2)
1160             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1161                              "not vectorized: too many incoming edges.");
1162         }
1163       if (inner_loop_vinfo)
1164         destroy_loop_vec_info (inner_loop_vinfo, true);
1165       return NULL;
1166     }
1167
1168   /* We assume that the loop exit condition is at the end of the loop. i.e,
1169      that the loop is represented as a do-while (with a proper if-guard
1170      before the loop if needed), where the loop header contains all the
1171      executable statements, and the latch is empty.  */
1172   if (!empty_block_p (loop->latch)
1173         || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1174     {
1175       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1176         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1177                          "not vectorized: unexpected loop form.");
1178       if (inner_loop_vinfo)
1179         destroy_loop_vec_info (inner_loop_vinfo, true);
1180       return NULL;
1181     }
1182
1183   /* Make sure there exists a single-predecessor exit bb:  */
1184   if (!single_pred_p (single_exit (loop)->dest))
1185     {
1186       edge e = single_exit (loop);
1187       if (!(e->flags & EDGE_ABNORMAL))
1188         {
1189           split_loop_exit_edge (e);
1190           if (dump_kind_p (MSG_NOTE))
1191             dump_printf (MSG_NOTE, "split exit edge.");
1192         }
1193       else
1194         {
1195           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1196             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1197                              "not vectorized: abnormal loop exit edge.");
1198           if (inner_loop_vinfo)
1199             destroy_loop_vec_info (inner_loop_vinfo, true);
1200           return NULL;
1201         }
1202     }
1203
1204   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1205   if (!loop_cond)
1206     {
1207       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1208         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1209                          "not vectorized: complicated exit condition.");
1210       if (inner_loop_vinfo)
1211         destroy_loop_vec_info (inner_loop_vinfo, true);
1212       return NULL;
1213     }
1214
1215   if (!number_of_iterations)
1216     {
1217       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1218         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1219                          "not vectorized: number of iterations cannot be "
1220                          "computed.");
1221       if (inner_loop_vinfo)
1222         destroy_loop_vec_info (inner_loop_vinfo, true);
1223       return NULL;
1224     }
1225
1226   if (chrec_contains_undetermined (number_of_iterations))
1227     {
1228       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1229             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1230                              "Infinite number of iterations.");
1231       if (inner_loop_vinfo)
1232         destroy_loop_vec_info (inner_loop_vinfo, true);
1233       return NULL;
1234     }
1235
1236   if (!NITERS_KNOWN_P (number_of_iterations))
1237     {
1238       if (dump_kind_p (MSG_NOTE))
1239         {
1240           dump_printf_loc (MSG_NOTE, vect_location,
1241                            "Symbolic number of iterations is ");
1242           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1243         }
1244     }
1245   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1246     {
1247       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1248         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1249                          "not vectorized: number of iterations = 0.");
1250       if (inner_loop_vinfo)
1251         destroy_loop_vec_info (inner_loop_vinfo, false);
1252       return NULL;
1253     }
1254
1255   loop_vinfo = new_loop_vec_info (loop);
1256   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1257   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1258
1259   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1260
1261   /* CHECKME: May want to keep it around it in the future.  */
1262   if (inner_loop_vinfo)
1263     destroy_loop_vec_info (inner_loop_vinfo, false);
1264
1265   gcc_assert (!loop->aux);
1266   loop->aux = loop_vinfo;
1267   return loop_vinfo;
1268 }
1269
1270
1271 /* Function vect_analyze_loop_operations.
1272
1273    Scan the loop stmts and make sure they are all vectorizable.  */
1274
1275 static bool
1276 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1277 {
1278   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1279   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1280   int nbbs = loop->num_nodes;
1281   gimple_stmt_iterator si;
1282   unsigned int vectorization_factor = 0;
1283   int i;
1284   gimple phi;
1285   stmt_vec_info stmt_info;
1286   bool need_to_vectorize = false;
1287   int min_profitable_iters;
1288   int min_scalar_loop_bound;
1289   unsigned int th;
1290   bool only_slp_in_loop = true, ok;
1291   HOST_WIDE_INT max_niter;
1292   HOST_WIDE_INT estimated_niter;
1293   int min_profitable_estimate;
1294
1295   if (dump_kind_p (MSG_NOTE))
1296     dump_printf_loc (MSG_NOTE, vect_location,
1297                      "=== vect_analyze_loop_operations ===");
1298
1299   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1300   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1301   if (slp)
1302     {
1303       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1304          vectorization factor of the loop is the unrolling factor required by
1305          the SLP instances.  If that unrolling factor is 1, we say, that we
1306          perform pure SLP on loop - cross iteration parallelism is not
1307          exploited.  */
1308       for (i = 0; i < nbbs; i++)
1309         {
1310           basic_block bb = bbs[i];
1311           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1312             {
1313               gimple stmt = gsi_stmt (si);
1314               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1315               gcc_assert (stmt_info);
1316               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1317                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1318                   && !PURE_SLP_STMT (stmt_info))
1319                 /* STMT needs both SLP and loop-based vectorization.  */
1320                 only_slp_in_loop = false;
1321             }
1322         }
1323
1324       if (only_slp_in_loop)
1325         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1326       else
1327         vectorization_factor = least_common_multiple (vectorization_factor,
1328                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1329
1330       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1331       if (dump_kind_p (MSG_NOTE))
1332         dump_printf_loc (MSG_NOTE, vect_location,
1333                          "Updating vectorization factor to %d ",
1334                          vectorization_factor);
1335     }
1336
1337   for (i = 0; i < nbbs; i++)
1338     {
1339       basic_block bb = bbs[i];
1340
1341       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1342         {
1343           phi = gsi_stmt (si);
1344           ok = true;
1345
1346           stmt_info = vinfo_for_stmt (phi);
1347           if (dump_kind_p (MSG_NOTE))
1348             {
1349               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1350               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1351             }
1352
1353           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1354              (i.e., a phi in the tail of the outer-loop).  */
1355           if (! is_loop_header_bb_p (bb))
1356             {
1357               /* FORNOW: we currently don't support the case that these phis
1358                  are not used in the outerloop (unless it is double reduction,
1359                  i.e., this phi is vect_reduction_def), cause this case
1360                  requires to actually do something here.  */
1361               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1362                    || STMT_VINFO_LIVE_P (stmt_info))
1363                   && STMT_VINFO_DEF_TYPE (stmt_info)
1364                      != vect_double_reduction_def)
1365                 {
1366                   if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1367                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1368                                      "Unsupported loop-closed phi in "
1369                                      "outer-loop.");
1370                   return false;
1371                 }
1372
1373               /* If PHI is used in the outer loop, we check that its operand
1374                  is defined in the inner loop.  */
1375               if (STMT_VINFO_RELEVANT_P (stmt_info))
1376                 {
1377                   tree phi_op;
1378                   gimple op_def_stmt;
1379
1380                   if (gimple_phi_num_args (phi) != 1)
1381                     return false;
1382
1383                   phi_op = PHI_ARG_DEF (phi, 0);
1384                   if (TREE_CODE (phi_op) != SSA_NAME)
1385                     return false;
1386
1387                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1388                   if (!op_def_stmt
1389                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1390                       || !vinfo_for_stmt (op_def_stmt))
1391                     return false;
1392
1393                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1394                         != vect_used_in_outer
1395                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1396                            != vect_used_in_outer_by_reduction)
1397                     return false;
1398                 }
1399
1400               continue;
1401             }
1402
1403           gcc_assert (stmt_info);
1404
1405           if (STMT_VINFO_LIVE_P (stmt_info))
1406             {
1407               /* FORNOW: not yet supported.  */
1408               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1409                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1410                                  "not vectorized: value used after loop.");
1411               return false;
1412             }
1413
1414           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1415               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1416             {
1417               /* A scalar-dependence cycle that we don't support.  */
1418               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1419                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1420                                  "not vectorized: scalar dependence cycle.");
1421               return false;
1422             }
1423
1424           if (STMT_VINFO_RELEVANT_P (stmt_info))
1425             {
1426               need_to_vectorize = true;
1427               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1428                 ok = vectorizable_induction (phi, NULL, NULL);
1429             }
1430
1431           if (!ok)
1432             {
1433               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1434                 {
1435                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1436                                    "not vectorized: relevant phi not "
1437                                    "supported: ");
1438                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1439                 }
1440               return false;
1441             }
1442         }
1443
1444       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1445         {
1446           gimple stmt = gsi_stmt (si);
1447           if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1448             return false;
1449         }
1450     } /* bbs */
1451
1452   /* All operations in the loop are either irrelevant (deal with loop
1453      control, or dead), or only used outside the loop and can be moved
1454      out of the loop (e.g. invariants, inductions).  The loop can be
1455      optimized away by scalar optimizations.  We're better off not
1456      touching this loop.  */
1457   if (!need_to_vectorize)
1458     {
1459       if (dump_kind_p (MSG_NOTE))
1460         dump_printf_loc (MSG_NOTE, vect_location,
1461                          "All the computation can be taken out of the loop.");
1462       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1463         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1464                          "not vectorized: redundant loop. no profit to "
1465                          "vectorize.");
1466       return false;
1467     }
1468
1469   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1470       && dump_kind_p (MSG_NOTE))
1471     dump_printf_loc (MSG_NOTE, vect_location,
1472                      "vectorization_factor = %d, niters = "
1473                      HOST_WIDE_INT_PRINT_DEC, vectorization_factor,
1474                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1475
1476   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1477        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1478       || ((max_niter = max_stmt_executions_int (loop)) != -1
1479           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1480     {
1481       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1482         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1483                          "not vectorized: iteration count too small.");
1484       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1485         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1486                          "not vectorized: iteration count smaller than "
1487                          "vectorization factor.");
1488       return false;
1489     }
1490
1491   /* Analyze cost.  Decide if worth while to vectorize.  */
1492
1493   /* Once VF is set, SLP costs should be updated since the number of created
1494      vector stmts depends on VF.  */
1495   vect_update_slp_costs_according_to_vf (loop_vinfo);
1496
1497   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1498                                       &min_profitable_estimate);
1499   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1500
1501   if (min_profitable_iters < 0)
1502     {
1503       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1504         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1505                          "not vectorized: vectorization not profitable.");
1506       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1507         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1508                          "not vectorized: vector version will never be "
1509                          "profitable.");
1510       return false;
1511     }
1512
1513   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1514                             * vectorization_factor) - 1);
1515
1516
1517   /* Use the cost model only if it is more conservative than user specified
1518      threshold.  */
1519
1520   th = (unsigned) min_scalar_loop_bound;
1521   if (min_profitable_iters
1522       && (!min_scalar_loop_bound
1523           || min_profitable_iters > min_scalar_loop_bound))
1524     th = (unsigned) min_profitable_iters;
1525
1526   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1527       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1528     {
1529       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1530         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1531                          "not vectorized: vectorization not profitable.");
1532       if (dump_kind_p (MSG_NOTE))
1533         dump_printf_loc (MSG_NOTE, vect_location,
1534                          "not vectorized: iteration count smaller than user "
1535                          "specified loop bound parameter or minimum profitable "
1536                          "iterations (whichever is more conservative).");
1537       return false;
1538     }
1539
1540   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1541       && ((unsigned HOST_WIDE_INT) estimated_niter
1542           <= MAX (th, (unsigned)min_profitable_estimate)))
1543     {
1544       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1545         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1546                          "not vectorized: estimated iteration count too "
1547                          "small.");
1548       if (dump_kind_p (MSG_NOTE))
1549         dump_printf_loc (MSG_NOTE, vect_location,
1550                          "not vectorized: estimated iteration count smaller "
1551                          "than specified loop bound parameter or minimum "
1552                          "profitable iterations (whichever is more "
1553                          "conservative).");
1554       return false;
1555     }
1556
1557   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1558       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1559       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1560     {
1561       if (dump_kind_p (MSG_NOTE))
1562         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.");
1563       if (!vect_can_advance_ivs_p (loop_vinfo))
1564         {
1565           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1566             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1567                              "not vectorized: can't create epilog loop 1.");
1568           return false;
1569         }
1570       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1571         {
1572           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1573             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1574                              "not vectorized: can't create epilog loop 2.");
1575           return false;
1576         }
1577     }
1578
1579   return true;
1580 }
1581
1582
1583 /* Function vect_analyze_loop_2.
1584
1585    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1586    for it.  The different analyses will record information in the
1587    loop_vec_info struct.  */
1588 static bool
1589 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1590 {
1591   bool ok, slp = false;
1592   int max_vf = MAX_VECTORIZATION_FACTOR;
1593   int min_vf = 2;
1594
1595   /* Find all data references in the loop (which correspond to vdefs/vuses)
1596      and analyze their evolution in the loop.  Also adjust the minimal
1597      vectorization factor according to the loads and stores.
1598
1599      FORNOW: Handle only simple, array references, which
1600      alignment can be forced, and aligned pointer-references.  */
1601
1602   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1603   if (!ok)
1604     {
1605       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1606         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1607                          "bad data references.");
1608       return false;
1609     }
1610
1611   /* Classify all cross-iteration scalar data-flow cycles.
1612      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1613
1614   vect_analyze_scalar_cycles (loop_vinfo);
1615
1616   vect_pattern_recog (loop_vinfo, NULL);
1617
1618   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1619
1620   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1621   if (!ok)
1622     {
1623       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1624         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1625                          "unexpected pattern.");
1626       return false;
1627     }
1628
1629   /* Analyze data dependences between the data-refs in the loop
1630      and adjust the maximum vectorization factor according to
1631      the dependences.
1632      FORNOW: fail at the first data dependence that we encounter.  */
1633
1634   ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1635   if (!ok
1636       || max_vf < min_vf)
1637     {
1638       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1639             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1640                              "bad data dependence.");
1641       return false;
1642     }
1643
1644   ok = vect_determine_vectorization_factor (loop_vinfo);
1645   if (!ok)
1646     {
1647       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1648         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1649                          "can't determine vectorization factor.");
1650       return false;
1651     }
1652   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1653     {
1654       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1655         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1656                          "bad data dependence.");
1657       return false;
1658     }
1659
1660   /* Analyze the alignment of the data-refs in the loop.
1661      Fail if a data reference is found that cannot be vectorized.  */
1662
1663   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1664   if (!ok)
1665     {
1666       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1667         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1668                          "bad data alignment.");
1669       return false;
1670     }
1671
1672   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1673      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1674
1675   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1676   if (!ok)
1677     {
1678       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1679         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1680                          "bad data access.");
1681       return false;
1682     }
1683
1684   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1685      It is important to call pruning after vect_analyze_data_ref_accesses,
1686      since we use grouping information gathered by interleaving analysis.  */
1687   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1688   if (!ok)
1689     {
1690       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1691         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1692                          "too long list of versioning for alias "
1693                          "run-time tests.");
1694       return false;
1695     }
1696
1697   /* This pass will decide on using loop versioning and/or loop peeling in
1698      order to enhance the alignment of data references in the loop.  */
1699
1700   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1701   if (!ok)
1702     {
1703       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1704         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1705                          "bad data alignment.");
1706       return false;
1707     }
1708
1709   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1710   ok = vect_analyze_slp (loop_vinfo, NULL);
1711   if (ok)
1712     {
1713       /* Decide which possible SLP instances to SLP.  */
1714       slp = vect_make_slp_decision (loop_vinfo);
1715
1716       /* Find stmts that need to be both vectorized and SLPed.  */
1717       vect_detect_hybrid_slp (loop_vinfo);
1718     }
1719   else
1720     return false;
1721
1722   /* Scan all the operations in the loop and make sure they are
1723      vectorizable.  */
1724
1725   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1726   if (!ok)
1727     {
1728       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1729         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1730                          "bad operation or unsupported loop bound.");
1731       return false;
1732     }
1733
1734   return true;
1735 }
1736
1737 /* Function vect_analyze_loop.
1738
1739    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1740    for it.  The different analyses will record information in the
1741    loop_vec_info struct.  */
1742 loop_vec_info
1743 vect_analyze_loop (struct loop *loop)
1744 {
1745   loop_vec_info loop_vinfo;
1746   unsigned int vector_sizes;
1747
1748   /* Autodetect first vector size we try.  */
1749   current_vector_size = 0;
1750   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1751
1752   if (dump_kind_p (MSG_NOTE))
1753     dump_printf_loc (MSG_NOTE, vect_location,
1754                      "===== analyze_loop_nest =====");
1755
1756   if (loop_outer (loop)
1757       && loop_vec_info_for_loop (loop_outer (loop))
1758       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1759     {
1760       if (dump_kind_p (MSG_NOTE))
1761         dump_printf_loc (MSG_NOTE, vect_location,
1762                          "outer-loop already vectorized.");
1763       return NULL;
1764     }
1765
1766   while (1)
1767     {
1768       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1769       loop_vinfo = vect_analyze_loop_form (loop);
1770       if (!loop_vinfo)
1771         {
1772           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1773             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1774                              "bad loop form.");
1775           return NULL;
1776         }
1777
1778       if (vect_analyze_loop_2 (loop_vinfo))
1779         {
1780           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1781
1782           return loop_vinfo;
1783         }
1784
1785       destroy_loop_vec_info (loop_vinfo, true);
1786
1787       vector_sizes &= ~current_vector_size;
1788       if (vector_sizes == 0
1789           || current_vector_size == 0)
1790         return NULL;
1791
1792       /* Try the next biggest vector size.  */
1793       current_vector_size = 1 << floor_log2 (vector_sizes);
1794       if (dump_kind_p (MSG_NOTE))
1795         dump_printf_loc (MSG_NOTE, vect_location,
1796                          "***** Re-trying analysis with "
1797                          "vector size %d\n", current_vector_size);
1798     }
1799 }
1800
1801
1802 /* Function reduction_code_for_scalar_code
1803
1804    Input:
1805    CODE - tree_code of a reduction operations.
1806
1807    Output:
1808    REDUC_CODE - the corresponding tree-code to be used to reduce the
1809       vector of partial results into a single scalar result (which
1810       will also reside in a vector) or ERROR_MARK if the operation is
1811       a supported reduction operation, but does not have such tree-code.
1812
1813    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1814
1815 static bool
1816 reduction_code_for_scalar_code (enum tree_code code,
1817                                 enum tree_code *reduc_code)
1818 {
1819   switch (code)
1820     {
1821       case MAX_EXPR:
1822         *reduc_code = REDUC_MAX_EXPR;
1823         return true;
1824
1825       case MIN_EXPR:
1826         *reduc_code = REDUC_MIN_EXPR;
1827         return true;
1828
1829       case PLUS_EXPR:
1830         *reduc_code = REDUC_PLUS_EXPR;
1831         return true;
1832
1833       case MULT_EXPR:
1834       case MINUS_EXPR:
1835       case BIT_IOR_EXPR:
1836       case BIT_XOR_EXPR:
1837       case BIT_AND_EXPR:
1838         *reduc_code = ERROR_MARK;
1839         return true;
1840
1841       default:
1842        return false;
1843     }
1844 }
1845
1846
1847 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1848    STMT is printed with a message MSG. */
1849
1850 static void
1851 report_vect_op (int msg_type, gimple stmt, const char *msg)
1852 {
1853   dump_printf_loc (msg_type, vect_location, "%s", msg);
1854   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1855 }
1856
1857
1858 /* Detect SLP reduction of the form:
1859
1860    #a1 = phi <a5, a0>
1861    a2 = operation (a1)
1862    a3 = operation (a2)
1863    a4 = operation (a3)
1864    a5 = operation (a4)
1865
1866    #a = phi <a5>
1867
1868    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1869    FIRST_STMT is the first reduction stmt in the chain
1870    (a2 = operation (a1)).
1871
1872    Return TRUE if a reduction chain was detected.  */
1873
1874 static bool
1875 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1876 {
1877   struct loop *loop = (gimple_bb (phi))->loop_father;
1878   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1879   enum tree_code code;
1880   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1881   stmt_vec_info use_stmt_info, current_stmt_info;
1882   tree lhs;
1883   imm_use_iterator imm_iter;
1884   use_operand_p use_p;
1885   int nloop_uses, size = 0, n_out_of_loop_uses;
1886   bool found = false;
1887
1888   if (loop != vect_loop)
1889     return false;
1890
1891   lhs = PHI_RESULT (phi);
1892   code = gimple_assign_rhs_code (first_stmt);
1893   while (1)
1894     {
1895       nloop_uses = 0;
1896       n_out_of_loop_uses = 0;
1897       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1898         {
1899           gimple use_stmt = USE_STMT (use_p);
1900           if (is_gimple_debug (use_stmt))
1901             continue;
1902
1903           use_stmt = USE_STMT (use_p);
1904
1905           /* Check if we got back to the reduction phi.  */
1906           if (use_stmt == phi)
1907             {
1908               loop_use_stmt = use_stmt;
1909               found = true;
1910               break;
1911             }
1912
1913           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1914             {
1915               if (vinfo_for_stmt (use_stmt)
1916                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1917                 {
1918                   loop_use_stmt = use_stmt;
1919                   nloop_uses++;
1920                 }
1921             }
1922            else
1923              n_out_of_loop_uses++;
1924
1925            /* There are can be either a single use in the loop or two uses in
1926               phi nodes.  */
1927            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1928              return false;
1929         }
1930
1931       if (found)
1932         break;
1933
1934       /* We reached a statement with no loop uses.  */
1935       if (nloop_uses == 0)
1936         return false;
1937
1938       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1939       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1940         return false;
1941
1942       if (!is_gimple_assign (loop_use_stmt)
1943           || code != gimple_assign_rhs_code (loop_use_stmt)
1944           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1945         return false;
1946
1947       /* Insert USE_STMT into reduction chain.  */
1948       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1949       if (current_stmt)
1950         {
1951           current_stmt_info = vinfo_for_stmt (current_stmt);
1952           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1953           GROUP_FIRST_ELEMENT (use_stmt_info)
1954             = GROUP_FIRST_ELEMENT (current_stmt_info);
1955         }
1956       else
1957         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1958
1959       lhs = gimple_assign_lhs (loop_use_stmt);
1960       current_stmt = loop_use_stmt;
1961       size++;
1962    }
1963
1964   if (!found || loop_use_stmt != phi || size < 2)
1965     return false;
1966
1967   /* Swap the operands, if needed, to make the reduction operand be the second
1968      operand.  */
1969   lhs = PHI_RESULT (phi);
1970   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1971   while (next_stmt)
1972     {
1973       if (gimple_assign_rhs2 (next_stmt) == lhs)
1974         {
1975           tree op = gimple_assign_rhs1 (next_stmt);
1976           gimple def_stmt = NULL;
1977
1978           if (TREE_CODE (op) == SSA_NAME)
1979             def_stmt = SSA_NAME_DEF_STMT (op);
1980
1981           /* Check that the other def is either defined in the loop
1982              ("vect_internal_def"), or it's an induction (defined by a
1983              loop-header phi-node).  */
1984           if (def_stmt
1985               && gimple_bb (def_stmt)
1986               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1987               && (is_gimple_assign (def_stmt)
1988                   || is_gimple_call (def_stmt)
1989                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1990                            == vect_induction_def
1991                   || (gimple_code (def_stmt) == GIMPLE_PHI
1992                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1993                                   == vect_internal_def
1994                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1995             {
1996               lhs = gimple_assign_lhs (next_stmt);
1997               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1998               continue;
1999             }
2000
2001           return false;
2002         }
2003       else
2004         {
2005           tree op = gimple_assign_rhs2 (next_stmt);
2006           gimple def_stmt = NULL;
2007
2008           if (TREE_CODE (op) == SSA_NAME)
2009             def_stmt = SSA_NAME_DEF_STMT (op);
2010
2011           /* Check that the other def is either defined in the loop
2012             ("vect_internal_def"), or it's an induction (defined by a
2013             loop-header phi-node).  */
2014           if (def_stmt
2015               && gimple_bb (def_stmt)
2016               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2017               && (is_gimple_assign (def_stmt)
2018                   || is_gimple_call (def_stmt)
2019                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2020                               == vect_induction_def
2021                   || (gimple_code (def_stmt) == GIMPLE_PHI
2022                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2023                                   == vect_internal_def
2024                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2025             {
2026               if (dump_kind_p (MSG_NOTE))
2027                 {
2028                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2029                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2030                 }
2031
2032               swap_tree_operands (next_stmt,
2033                                   gimple_assign_rhs1_ptr (next_stmt),
2034                                   gimple_assign_rhs2_ptr (next_stmt));
2035               update_stmt (next_stmt);
2036
2037               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2038                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2039             }
2040           else
2041             return false;
2042         }
2043
2044       lhs = gimple_assign_lhs (next_stmt);
2045       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2046     }
2047
2048   /* Save the chain for further analysis in SLP detection.  */
2049   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2050   VEC_safe_push (gimple, heap, LOOP_VINFO_REDUCTION_CHAINS (loop_info), first);
2051   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2052
2053   return true;
2054 }
2055
2056
2057 /* Function vect_is_simple_reduction_1
2058
2059    (1) Detect a cross-iteration def-use cycle that represents a simple
2060    reduction computation.  We look for the following pattern:
2061
2062    loop_header:
2063      a1 = phi < a0, a2 >
2064      a3 = ...
2065      a2 = operation (a3, a1)
2066
2067    such that:
2068    1. operation is commutative and associative and it is safe to
2069       change the order of the computation (if CHECK_REDUCTION is true)
2070    2. no uses for a2 in the loop (a2 is used out of the loop)
2071    3. no uses of a1 in the loop besides the reduction operation
2072    4. no uses of a1 outside the loop.
2073
2074    Conditions 1,4 are tested here.
2075    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2076
2077    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2078    nested cycles, if CHECK_REDUCTION is false.
2079
2080    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2081    reductions:
2082
2083      a1 = phi < a0, a2 >
2084      inner loop (def of a3)
2085      a2 = phi < a3 >
2086
2087    If MODIFY is true it tries also to rework the code in-place to enable
2088    detection of more reduction patterns.  For the time being we rewrite
2089    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2090 */
2091
2092 static gimple
2093 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2094                             bool check_reduction, bool *double_reduc,
2095                             bool modify)
2096 {
2097   struct loop *loop = (gimple_bb (phi))->loop_father;
2098   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2099   edge latch_e = loop_latch_edge (loop);
2100   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2101   gimple def_stmt, def1 = NULL, def2 = NULL;
2102   enum tree_code orig_code, code;
2103   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2104   tree type;
2105   int nloop_uses;
2106   tree name;
2107   imm_use_iterator imm_iter;
2108   use_operand_p use_p;
2109   bool phi_def;
2110
2111   *double_reduc = false;
2112
2113   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2114      otherwise, we assume outer loop vectorization.  */
2115   gcc_assert ((check_reduction && loop == vect_loop)
2116               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2117
2118   name = PHI_RESULT (phi);
2119   nloop_uses = 0;
2120   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2121     {
2122       gimple use_stmt = USE_STMT (use_p);
2123       if (is_gimple_debug (use_stmt))
2124         continue;
2125
2126       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2127         {
2128           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2129             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130                              "intermediate value used outside loop.");
2131
2132           return NULL;
2133         }
2134
2135       if (vinfo_for_stmt (use_stmt)
2136           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2137         nloop_uses++;
2138       if (nloop_uses > 1)
2139         {
2140           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2141             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2142                              "reduction used in loop.");
2143           return NULL;
2144         }
2145     }
2146
2147   if (TREE_CODE (loop_arg) != SSA_NAME)
2148     {
2149       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2150         {
2151           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2152                            "reduction: not ssa_name: ");
2153           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2154         }
2155       return NULL;
2156     }
2157
2158   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2159   if (!def_stmt)
2160     {
2161       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2162         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2163                          "reduction: no def_stmt.");
2164       return NULL;
2165     }
2166
2167   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2168     {
2169       if (dump_kind_p (MSG_NOTE))
2170         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2171       return NULL;
2172     }
2173
2174   if (is_gimple_assign (def_stmt))
2175     {
2176       name = gimple_assign_lhs (def_stmt);
2177       phi_def = false;
2178     }
2179   else
2180     {
2181       name = PHI_RESULT (def_stmt);
2182       phi_def = true;
2183     }
2184
2185   nloop_uses = 0;
2186   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2187     {
2188       gimple use_stmt = USE_STMT (use_p);
2189       if (is_gimple_debug (use_stmt))
2190         continue;
2191       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2192           && vinfo_for_stmt (use_stmt)
2193           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2194         nloop_uses++;
2195       if (nloop_uses > 1)
2196         {
2197           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2198             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2199                              "reduction used in loop.");
2200           return NULL;
2201         }
2202     }
2203
2204   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2205      defined in the inner loop.  */
2206   if (phi_def)
2207     {
2208       op1 = PHI_ARG_DEF (def_stmt, 0);
2209
2210       if (gimple_phi_num_args (def_stmt) != 1
2211           || TREE_CODE (op1) != SSA_NAME)
2212         {
2213           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2214             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2215                              "unsupported phi node definition.");
2216
2217           return NULL;
2218         }
2219
2220       def1 = SSA_NAME_DEF_STMT (op1);
2221       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2222           && loop->inner
2223           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2224           && is_gimple_assign (def1))
2225         {
2226           if (dump_kind_p (MSG_NOTE))
2227             report_vect_op (MSG_NOTE, def_stmt,
2228                             "detected double reduction: ");
2229
2230           *double_reduc = true;
2231           return def_stmt;
2232         }
2233
2234       return NULL;
2235     }
2236
2237   code = orig_code = gimple_assign_rhs_code (def_stmt);
2238
2239   /* We can handle "res -= x[i]", which is non-associative by
2240      simply rewriting this into "res += -x[i]".  Avoid changing
2241      gimple instruction for the first simple tests and only do this
2242      if we're allowed to change code at all.  */
2243   if (code == MINUS_EXPR
2244       && modify
2245       && (op1 = gimple_assign_rhs1 (def_stmt))
2246       && TREE_CODE (op1) == SSA_NAME
2247       && SSA_NAME_DEF_STMT (op1) == phi)
2248     code = PLUS_EXPR;
2249
2250   if (check_reduction
2251       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2252     {
2253       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2254         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2255                         "reduction: not commutative/associative: ");
2256       return NULL;
2257     }
2258
2259   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2260     {
2261       if (code != COND_EXPR)
2262         {
2263           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2264             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2265                             "reduction: not binary operation: ");
2266
2267           return NULL;
2268         }
2269
2270       op3 = gimple_assign_rhs1 (def_stmt);
2271       if (COMPARISON_CLASS_P (op3))
2272         {
2273           op4 = TREE_OPERAND (op3, 1);
2274           op3 = TREE_OPERAND (op3, 0);
2275         }
2276
2277       op1 = gimple_assign_rhs2 (def_stmt);
2278       op2 = gimple_assign_rhs3 (def_stmt);
2279
2280       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2281         {
2282           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2283             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2284                             "reduction: uses not ssa_names: ");
2285
2286           return NULL;
2287         }
2288     }
2289   else
2290     {
2291       op1 = gimple_assign_rhs1 (def_stmt);
2292       op2 = gimple_assign_rhs2 (def_stmt);
2293
2294       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2295         {
2296           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2297             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2298                             "reduction: uses not ssa_names: ");
2299
2300           return NULL;
2301         }
2302    }
2303
2304   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2305   if ((TREE_CODE (op1) == SSA_NAME
2306        && !types_compatible_p (type,TREE_TYPE (op1)))
2307       || (TREE_CODE (op2) == SSA_NAME
2308           && !types_compatible_p (type, TREE_TYPE (op2)))
2309       || (op3 && TREE_CODE (op3) == SSA_NAME
2310           && !types_compatible_p (type, TREE_TYPE (op3)))
2311       || (op4 && TREE_CODE (op4) == SSA_NAME
2312           && !types_compatible_p (type, TREE_TYPE (op4))))
2313     {
2314       if (dump_kind_p (MSG_NOTE))
2315         {
2316           dump_printf_loc (MSG_NOTE, vect_location,
2317                            "reduction: multiple types: operation type: ");
2318           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2319           dump_printf (MSG_NOTE, ", operands types: ");
2320           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2321                              TREE_TYPE (op1));
2322           dump_printf (MSG_NOTE, ",");
2323           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2324                              TREE_TYPE (op2));
2325           if (op3)
2326             {
2327               dump_printf (MSG_NOTE, ",");
2328               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2329                                  TREE_TYPE (op3));
2330             }
2331
2332           if (op4)
2333             {
2334               dump_printf (MSG_NOTE, ",");
2335               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2336                                  TREE_TYPE (op4));
2337             }
2338         }
2339
2340       return NULL;
2341     }
2342
2343   /* Check that it's ok to change the order of the computation.
2344      Generally, when vectorizing a reduction we change the order of the
2345      computation.  This may change the behavior of the program in some
2346      cases, so we need to check that this is ok.  One exception is when
2347      vectorizing an outer-loop: the inner-loop is executed sequentially,
2348      and therefore vectorizing reductions in the inner-loop during
2349      outer-loop vectorization is safe.  */
2350
2351   /* CHECKME: check for !flag_finite_math_only too?  */
2352   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2353       && check_reduction)
2354     {
2355       /* Changing the order of operations changes the semantics.  */
2356       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2357         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2358                         "reduction: unsafe fp math optimization: ");
2359       return NULL;
2360     }
2361   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2362            && check_reduction)
2363     {
2364       /* Changing the order of operations changes the semantics.  */
2365       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2366         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2367                         "reduction: unsafe int math optimization: ");
2368       return NULL;
2369     }
2370   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2371     {
2372       /* Changing the order of operations changes the semantics.  */
2373       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2374         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2375                         "reduction: unsafe fixed-point math optimization: ");
2376       return NULL;
2377     }
2378
2379   /* If we detected "res -= x[i]" earlier, rewrite it into
2380      "res += -x[i]" now.  If this turns out to be useless reassoc
2381      will clean it up again.  */
2382   if (orig_code == MINUS_EXPR)
2383     {
2384       tree rhs = gimple_assign_rhs2 (def_stmt);
2385       tree negrhs = copy_ssa_name (rhs, NULL);
2386       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2387                                                          rhs, NULL);
2388       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2389       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2390                                                           loop_info, NULL));
2391       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2392       gimple_assign_set_rhs2 (def_stmt, negrhs);
2393       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2394       update_stmt (def_stmt);
2395     }
2396
2397   /* Reduction is safe. We're dealing with one of the following:
2398      1) integer arithmetic and no trapv
2399      2) floating point arithmetic, and special flags permit this optimization
2400      3) nested cycle (i.e., outer loop vectorization).  */
2401   if (TREE_CODE (op1) == SSA_NAME)
2402     def1 = SSA_NAME_DEF_STMT (op1);
2403
2404   if (TREE_CODE (op2) == SSA_NAME)
2405     def2 = SSA_NAME_DEF_STMT (op2);
2406
2407   if (code != COND_EXPR
2408       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2409     {
2410       if (dump_kind_p (MSG_NOTE))
2411         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2412       return NULL;
2413     }
2414
2415   /* Check that one def is the reduction def, defined by PHI,
2416      the other def is either defined in the loop ("vect_internal_def"),
2417      or it's an induction (defined by a loop-header phi-node).  */
2418
2419   if (def2 && def2 == phi
2420       && (code == COND_EXPR
2421           || !def1 || gimple_nop_p (def1)
2422           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2423               && (is_gimple_assign (def1)
2424                   || is_gimple_call (def1)
2425                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2426                       == vect_induction_def
2427                   || (gimple_code (def1) == GIMPLE_PHI
2428                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2429                           == vect_internal_def
2430                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2431     {
2432       if (dump_kind_p (MSG_NOTE))
2433         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2434       return def_stmt;
2435     }
2436
2437   if (def1 && def1 == phi
2438       && (code == COND_EXPR
2439           || !def2 || gimple_nop_p (def2)
2440           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2441               && (is_gimple_assign (def2)
2442                   || is_gimple_call (def2)
2443                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2444                       == vect_induction_def
2445                   || (gimple_code (def2) == GIMPLE_PHI
2446                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2447                           == vect_internal_def
2448                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2449     {
2450       if (check_reduction)
2451         {
2452           /* Swap operands (just for simplicity - so that the rest of the code
2453              can assume that the reduction variable is always the last (second)
2454              argument).  */
2455           if (dump_kind_p (MSG_NOTE))
2456             report_vect_op (MSG_NOTE, def_stmt,
2457                             "detected reduction: need to swap operands: ");
2458
2459           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2460                               gimple_assign_rhs2_ptr (def_stmt));
2461
2462           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2463             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2464         }
2465       else
2466         {
2467           if (dump_kind_p (MSG_NOTE))
2468             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2469         }
2470
2471       return def_stmt;
2472     }
2473
2474   /* Try to find SLP reduction chain.  */
2475   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2476     {
2477       if (dump_kind_p (MSG_NOTE))
2478         report_vect_op (MSG_NOTE, def_stmt,
2479                         "reduction: detected reduction chain: ");
2480
2481       return def_stmt;
2482     }
2483
2484   if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2485     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2486                     "reduction: unknown pattern: ");
2487
2488   return NULL;
2489 }
2490
2491 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2492    in-place.  Arguments as there.  */
2493
2494 static gimple
2495 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2496                           bool check_reduction, bool *double_reduc)
2497 {
2498   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2499                                      double_reduc, false);
2500 }
2501
2502 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2503    in-place if it enables detection of more reductions.  Arguments
2504    as there.  */
2505
2506 gimple
2507 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2508                           bool check_reduction, bool *double_reduc)
2509 {
2510   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2511                                      double_reduc, true);
2512 }
2513
2514 /* Calculate the cost of one scalar iteration of the loop.  */
2515 int
2516 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2517 {
2518   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2519   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2520   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2521   int innerloop_iters, i, stmt_cost;
2522
2523   /* Count statements in scalar loop.  Using this as scalar cost for a single
2524      iteration for now.
2525
2526      TODO: Add outer loop support.
2527
2528      TODO: Consider assigning different costs to different scalar
2529      statements.  */
2530
2531   /* FORNOW.  */
2532   innerloop_iters = 1;
2533   if (loop->inner)
2534     innerloop_iters = 50; /* FIXME */
2535
2536   for (i = 0; i < nbbs; i++)
2537     {
2538       gimple_stmt_iterator si;
2539       basic_block bb = bbs[i];
2540
2541       if (bb->loop_father == loop->inner)
2542         factor = innerloop_iters;
2543       else
2544         factor = 1;
2545
2546       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2547         {
2548           gimple stmt = gsi_stmt (si);
2549           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2550
2551           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2552             continue;
2553
2554           /* Skip stmts that are not vectorized inside the loop.  */
2555           if (stmt_info
2556               && !STMT_VINFO_RELEVANT_P (stmt_info)
2557               && (!STMT_VINFO_LIVE_P (stmt_info)
2558                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2559               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2560             continue;
2561
2562           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2563             {
2564               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2565                stmt_cost = vect_get_stmt_cost (scalar_load);
2566              else
2567                stmt_cost = vect_get_stmt_cost (scalar_store);
2568             }
2569           else
2570             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2571
2572           scalar_single_iter_cost += stmt_cost * factor;
2573         }
2574     }
2575   return scalar_single_iter_cost;
2576 }
2577
2578 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2579 int
2580 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2581                              int *peel_iters_epilogue,
2582                              int scalar_single_iter_cost,
2583                              stmt_vector_for_cost *prologue_cost_vec,
2584                              stmt_vector_for_cost *epilogue_cost_vec)
2585 {
2586   int retval = 0;
2587   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2588
2589   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2590     {
2591       *peel_iters_epilogue = vf/2;
2592       if (dump_kind_p (MSG_NOTE))
2593         dump_printf_loc (MSG_NOTE, vect_location,
2594                          "cost model: epilogue peel iters set to vf/2 "
2595                          "because loop iterations are unknown .");
2596
2597       /* If peeled iterations are known but number of scalar loop
2598          iterations are unknown, count a taken branch per peeled loop.  */
2599       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2600                                  NULL, 0, vect_prologue);
2601     }
2602   else
2603     {
2604       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2605       peel_iters_prologue = niters < peel_iters_prologue ?
2606                             niters : peel_iters_prologue;
2607       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2608       /* If we need to peel for gaps, but no peeling is required, we have to
2609          peel VF iterations.  */
2610       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2611         *peel_iters_epilogue = vf;
2612     }
2613
2614   if (peel_iters_prologue)
2615     retval += record_stmt_cost (prologue_cost_vec,
2616                                 peel_iters_prologue * scalar_single_iter_cost,
2617                                 scalar_stmt, NULL, 0, vect_prologue);
2618   if (*peel_iters_epilogue)
2619     retval += record_stmt_cost (epilogue_cost_vec,
2620                                 *peel_iters_epilogue * scalar_single_iter_cost,
2621                                 scalar_stmt, NULL, 0, vect_epilogue);
2622   return retval;
2623 }
2624
2625 /* Function vect_estimate_min_profitable_iters
2626
2627    Return the number of iterations required for the vector version of the
2628    loop to be profitable relative to the cost of the scalar version of the
2629    loop.  */
2630
2631 static void
2632 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2633                                     int *ret_min_profitable_niters,
2634                                     int *ret_min_profitable_estimate)
2635 {
2636   int min_profitable_iters;
2637   int min_profitable_estimate;
2638   int peel_iters_prologue;
2639   int peel_iters_epilogue;
2640   unsigned vec_inside_cost = 0;
2641   int vec_outside_cost = 0;
2642   unsigned vec_prologue_cost = 0;
2643   unsigned vec_epilogue_cost = 0;
2644   int scalar_single_iter_cost = 0;
2645   int scalar_outside_cost = 0;
2646   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2647   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2648   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2649
2650   /* Cost model disabled.  */
2651   if (!flag_vect_cost_model)
2652     {
2653       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.");
2654       *ret_min_profitable_niters = 0;
2655       *ret_min_profitable_estimate = 0;
2656       return;
2657     }
2658
2659   /* Requires loop versioning tests to handle misalignment.  */
2660   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2661     {
2662       /*  FIXME: Make cost depend on complexity of individual check.  */
2663       unsigned len = VEC_length (gimple,
2664                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2665       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2666                             vect_prologue);
2667       dump_printf (MSG_NOTE,
2668                    "cost model: Adding cost of checks for loop "
2669                    "versioning to treat misalignment.\n");
2670     }
2671
2672   /* Requires loop versioning with alias checks.  */
2673   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2674     {
2675       /*  FIXME: Make cost depend on complexity of individual check.  */
2676       unsigned len = VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2677       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2678                             vect_prologue);
2679       dump_printf (MSG_NOTE,
2680                    "cost model: Adding cost of checks for loop "
2681                    "versioning aliasing.\n");
2682     }
2683
2684   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2685       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2686     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2687                           vect_prologue);
2688
2689   /* Count statements in scalar loop.  Using this as scalar cost for a single
2690      iteration for now.
2691
2692      TODO: Add outer loop support.
2693
2694      TODO: Consider assigning different costs to different scalar
2695      statements.  */
2696
2697   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2698
2699   /* Add additional cost for the peeled instructions in prologue and epilogue
2700      loop.
2701
2702      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2703      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2704
2705      TODO: Build an expression that represents peel_iters for prologue and
2706      epilogue to be used in a run-time test.  */
2707
2708   if (npeel  < 0)
2709     {
2710       peel_iters_prologue = vf/2;
2711       dump_printf (MSG_NOTE, "cost model: "
2712                    "prologue peel iters set to vf/2.");
2713
2714       /* If peeling for alignment is unknown, loop bound of main loop becomes
2715          unknown.  */
2716       peel_iters_epilogue = vf/2;
2717       dump_printf (MSG_NOTE, "cost model: "
2718                    "epilogue peel iters set to vf/2 because "
2719                    "peeling for alignment is unknown.");
2720
2721       /* If peeled iterations are unknown, count a taken branch and a not taken
2722          branch per peeled loop. Even if scalar loop iterations are known,
2723          vector iterations are not known since peeled prologue iterations are
2724          not known. Hence guards remain the same.  */
2725       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2726                             NULL, 0, vect_prologue);
2727       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2728                             NULL, 0, vect_prologue);
2729       /* FORNOW: Don't attempt to pass individual scalar instructions to
2730          the model; just assume linear cost for scalar iterations.  */
2731       (void) add_stmt_cost (target_cost_data,
2732                             peel_iters_prologue * scalar_single_iter_cost,
2733                             scalar_stmt, NULL, 0, vect_prologue);
2734       (void) add_stmt_cost (target_cost_data,
2735                             peel_iters_epilogue * scalar_single_iter_cost,
2736                             scalar_stmt, NULL, 0, vect_epilogue);
2737     }
2738   else
2739     {
2740       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2741       stmt_info_for_cost *si;
2742       int j;
2743       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2744
2745       prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
2746       epilogue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
2747       peel_iters_prologue = npeel;
2748
2749       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2750                                           &peel_iters_epilogue,
2751                                           scalar_single_iter_cost,
2752                                           &prologue_cost_vec,
2753                                           &epilogue_cost_vec);
2754
2755       FOR_EACH_VEC_ELT (stmt_info_for_cost, prologue_cost_vec, j, si)
2756         {
2757           struct _stmt_vec_info *stmt_info
2758             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2759           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2760                                 si->misalign, vect_prologue);
2761         }
2762
2763       FOR_EACH_VEC_ELT (stmt_info_for_cost, epilogue_cost_vec, j, si)
2764         {
2765           struct _stmt_vec_info *stmt_info
2766             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2767           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2768                                 si->misalign, vect_epilogue);
2769         }
2770
2771       VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
2772       VEC_free (stmt_info_for_cost, heap, epilogue_cost_vec);
2773     }
2774
2775   /* FORNOW: The scalar outside cost is incremented in one of the
2776      following ways:
2777
2778      1. The vectorizer checks for alignment and aliasing and generates
2779      a condition that allows dynamic vectorization.  A cost model
2780      check is ANDED with the versioning condition.  Hence scalar code
2781      path now has the added cost of the versioning check.
2782
2783        if (cost > th & versioning_check)
2784          jmp to vector code
2785
2786      Hence run-time scalar is incremented by not-taken branch cost.
2787
2788      2. The vectorizer then checks if a prologue is required.  If the
2789      cost model check was not done before during versioning, it has to
2790      be done before the prologue check.
2791
2792        if (cost <= th)
2793          prologue = scalar_iters
2794        if (prologue == 0)
2795          jmp to vector code
2796        else
2797          execute prologue
2798        if (prologue == num_iters)
2799          go to exit
2800
2801      Hence the run-time scalar cost is incremented by a taken branch,
2802      plus a not-taken branch, plus a taken branch cost.
2803
2804      3. The vectorizer then checks if an epilogue is required.  If the
2805      cost model check was not done before during prologue check, it
2806      has to be done with the epilogue check.
2807
2808        if (prologue == 0)
2809          jmp to vector code
2810        else
2811          execute prologue
2812        if (prologue == num_iters)
2813          go to exit
2814        vector code:
2815          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2816            jmp to epilogue
2817
2818      Hence the run-time scalar cost should be incremented by 2 taken
2819      branches.
2820
2821      TODO: The back end may reorder the BBS's differently and reverse
2822      conditions/branch directions.  Change the estimates below to
2823      something more reasonable.  */
2824
2825   /* If the number of iterations is known and we do not do versioning, we can
2826      decide whether to vectorize at compile time.  Hence the scalar version
2827      do not carry cost model guard costs.  */
2828   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2829       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2830       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2831     {
2832       /* Cost model check occurs at versioning.  */
2833       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2834           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2835         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2836       else
2837         {
2838           /* Cost model check occurs at prologue generation.  */
2839           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2840             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2841               + vect_get_stmt_cost (cond_branch_not_taken);
2842           /* Cost model check occurs at epilogue generation.  */
2843           else
2844             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2845         }
2846     }
2847
2848   /* Complete the target-specific cost calculations.  */
2849   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2850                &vec_inside_cost, &vec_epilogue_cost);
2851
2852   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2853
2854   /* Calculate number of iterations required to make the vector version
2855      profitable, relative to the loop bodies only.  The following condition
2856      must hold true:
2857      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2858      where
2859      SIC = scalar iteration cost, VIC = vector iteration cost,
2860      VOC = vector outside cost, VF = vectorization factor,
2861      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2862      SOC = scalar outside cost for run time cost model check.  */
2863
2864   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2865     {
2866       if (vec_outside_cost <= 0)
2867         min_profitable_iters = 1;
2868       else
2869         {
2870           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2871                                   - vec_inside_cost * peel_iters_prologue
2872                                   - vec_inside_cost * peel_iters_epilogue)
2873                                  / ((scalar_single_iter_cost * vf)
2874                                     - vec_inside_cost);
2875
2876           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2877               <= (((int) vec_inside_cost * min_profitable_iters)
2878                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2879             min_profitable_iters++;
2880         }
2881     }
2882   /* vector version will never be profitable.  */
2883   else
2884     {
2885       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2886         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2887                          "cost model: the vector iteration cost = %d "
2888                          "divided by the scalar iteration cost = %d "
2889                          "is greater or equal to the vectorization factor = %d.",
2890                          vec_inside_cost, scalar_single_iter_cost, vf);
2891       *ret_min_profitable_niters = -1;
2892       *ret_min_profitable_estimate = -1;
2893       return;
2894     }
2895
2896   if (dump_kind_p (MSG_NOTE))
2897     {
2898       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2899       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2900                    vec_inside_cost);
2901       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2902                    vec_prologue_cost);
2903       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2904                    vec_epilogue_cost);
2905       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2906                    scalar_single_iter_cost);
2907       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2908                    scalar_outside_cost);
2909       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2910                    vec_outside_cost);
2911       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2912                    peel_iters_prologue);
2913       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2914                    peel_iters_epilogue);
2915       dump_printf (MSG_NOTE,
2916                    "  Calculated minimum iters for profitability: %d\n",
2917                    min_profitable_iters);
2918     }
2919
2920   min_profitable_iters =
2921         min_profitable_iters < vf ? vf : min_profitable_iters;
2922
2923   /* Because the condition we create is:
2924      if (niters <= min_profitable_iters)
2925        then skip the vectorized loop.  */
2926   min_profitable_iters--;
2927
2928   if (dump_kind_p (MSG_NOTE))
2929     dump_printf_loc (MSG_NOTE, vect_location,
2930                      "  Runtime profitability threshold = %d\n", min_profitable_iters);
2931
2932   *ret_min_profitable_niters = min_profitable_iters;
2933
2934   /* Calculate number of iterations required to make the vector version
2935      profitable, relative to the loop bodies only.
2936
2937      Non-vectorized variant is SIC * niters and it must win over vector
2938      variant on the expected loop trip count.  The following condition must hold true:
2939      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2940
2941   if (vec_outside_cost <= 0)
2942     min_profitable_estimate = 1;
2943   else
2944     {
2945       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2946                                  - vec_inside_cost * peel_iters_prologue
2947                                  - vec_inside_cost * peel_iters_epilogue)
2948                                  / ((scalar_single_iter_cost * vf)
2949                                    - vec_inside_cost);
2950     }
2951   min_profitable_estimate --;
2952   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
2953   if (dump_kind_p (MSG_NOTE))
2954     dump_printf_loc (MSG_NOTE, vect_location,
2955                      "  Static estimate profitability threshold = %d\n",
2956                       min_profitable_iters);
2957
2958   *ret_min_profitable_estimate = min_profitable_estimate;
2959 }
2960
2961
2962 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2963    functions. Design better to avoid maintenance issues.  */
2964
2965 /* Function vect_model_reduction_cost.
2966
2967    Models cost for a reduction operation, including the vector ops
2968    generated within the strip-mine loop, the initial definition before
2969    the loop, and the epilogue code that must be generated.  */
2970
2971 static bool
2972 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2973                            int ncopies)
2974 {
2975   int prologue_cost = 0, epilogue_cost = 0;
2976   enum tree_code code;
2977   optab optab;
2978   tree vectype;
2979   gimple stmt, orig_stmt;
2980   tree reduction_op;
2981   enum machine_mode mode;
2982   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2983   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2984   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2985
2986   /* Cost of reduction op inside loop.  */
2987   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
2988                                         stmt_info, 0, vect_body);
2989   stmt = STMT_VINFO_STMT (stmt_info);
2990
2991   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2992     {
2993     case GIMPLE_SINGLE_RHS:
2994       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2995       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2996       break;
2997     case GIMPLE_UNARY_RHS:
2998       reduction_op = gimple_assign_rhs1 (stmt);
2999       break;
3000     case GIMPLE_BINARY_RHS:
3001       reduction_op = gimple_assign_rhs2 (stmt);
3002       break;
3003     case GIMPLE_TERNARY_RHS:
3004       reduction_op = gimple_assign_rhs3 (stmt);
3005       break;
3006     default:
3007       gcc_unreachable ();
3008     }
3009
3010   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3011   if (!vectype)
3012     {
3013       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3014         {
3015           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3016                            "unsupported data-type ");
3017           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3018                              TREE_TYPE (reduction_op));
3019         }
3020       return false;
3021    }
3022
3023   mode = TYPE_MODE (vectype);
3024   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3025
3026   if (!orig_stmt)
3027     orig_stmt = STMT_VINFO_STMT (stmt_info);
3028
3029   code = gimple_assign_rhs_code (orig_stmt);
3030
3031   /* Add in cost for initial definition.  */
3032   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3033                                   stmt_info, 0, vect_prologue);
3034
3035   /* Determine cost of epilogue code.
3036
3037      We have a reduction operator that will reduce the vector in one statement.
3038      Also requires scalar extract.  */
3039
3040   if (!nested_in_vect_loop_p (loop, orig_stmt))
3041     {
3042       if (reduc_code != ERROR_MARK)
3043         {
3044           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3045                                           stmt_info, 0, vect_epilogue);
3046           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3047                                           stmt_info, 0, vect_epilogue);
3048         }
3049       else
3050         {
3051           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3052           tree bitsize =
3053             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3054           int element_bitsize = tree_low_cst (bitsize, 1);
3055           int nelements = vec_size_in_bits / element_bitsize;
3056
3057           optab = optab_for_tree_code (code, vectype, optab_default);
3058
3059           /* We have a whole vector shift available.  */
3060           if (VECTOR_MODE_P (mode)
3061               && optab_handler (optab, mode) != CODE_FOR_nothing
3062               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3063             {
3064               /* Final reduction via vector shifts and the reduction operator.
3065                  Also requires scalar extract.  */
3066               epilogue_cost += add_stmt_cost (target_cost_data,
3067                                               exact_log2 (nelements) * 2,
3068                                               vector_stmt, stmt_info, 0,
3069                                               vect_epilogue);
3070               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3071                                               vec_to_scalar, stmt_info, 0,
3072                                               vect_epilogue);
3073             }
3074           else
3075             /* Use extracts and reduction op for final reduction.  For N
3076                elements, we have N extracts and N-1 reduction ops.  */
3077             epilogue_cost += add_stmt_cost (target_cost_data,
3078                                             nelements + nelements - 1,
3079                                             vector_stmt, stmt_info, 0,
3080                                             vect_epilogue);
3081         }
3082     }
3083
3084   if (dump_kind_p (MSG_NOTE))
3085     dump_printf (MSG_NOTE,
3086                  "vect_model_reduction_cost: inside_cost = %d, "
3087                  "prologue_cost = %d, epilogue_cost = %d .", inside_cost,
3088                  prologue_cost, epilogue_cost);
3089
3090   return true;
3091 }
3092
3093
3094 /* Function vect_model_induction_cost.
3095
3096    Models cost for induction operations.  */
3097
3098 static void
3099 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3100 {
3101   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3102   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3103   unsigned inside_cost, prologue_cost;
3104
3105   /* loop cost for vec_loop.  */
3106   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3107                                stmt_info, 0, vect_body);
3108
3109   /* prologue cost for vec_init and vec_step.  */
3110   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3111                                  stmt_info, 0, vect_prologue);
3112
3113   if (dump_kind_p (MSG_NOTE))
3114     dump_printf_loc (MSG_NOTE, vect_location,
3115                      "vect_model_induction_cost: inside_cost = %d, "
3116                      "prologue_cost = %d .", inside_cost, prologue_cost);
3117 }
3118
3119
3120 /* Function get_initial_def_for_induction
3121
3122    Input:
3123    STMT - a stmt that performs an induction operation in the loop.
3124    IV_PHI - the initial value of the induction variable
3125
3126    Output:
3127    Return a vector variable, initialized with the first VF values of
3128    the induction variable.  E.g., for an iv with IV_PHI='X' and
3129    evolution S, for a vector of 4 units, we want to return:
3130    [X, X + S, X + 2*S, X + 3*S].  */
3131
3132 static tree
3133 get_initial_def_for_induction (gimple iv_phi)
3134 {
3135   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3136   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3137   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3138   tree scalar_type;
3139   tree vectype;
3140   int nunits;
3141   edge pe = loop_preheader_edge (loop);
3142   struct loop *iv_loop;
3143   basic_block new_bb;
3144   tree vec, vec_init, vec_step, t;
3145   tree access_fn;
3146   tree new_var;
3147   tree new_name;
3148   gimple init_stmt, induction_phi, new_stmt;
3149   tree induc_def, vec_def, vec_dest;
3150   tree init_expr, step_expr;
3151   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3152   int i;
3153   bool ok;
3154   int ncopies;
3155   tree expr;
3156   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3157   bool nested_in_vect_loop = false;
3158   gimple_seq stmts = NULL;
3159   imm_use_iterator imm_iter;
3160   use_operand_p use_p;
3161   gimple exit_phi;
3162   edge latch_e;
3163   tree loop_arg;
3164   gimple_stmt_iterator si;
3165   basic_block bb = gimple_bb (iv_phi);
3166   tree stepvectype;
3167   tree resvectype;
3168
3169   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3170   if (nested_in_vect_loop_p (loop, iv_phi))
3171     {
3172       nested_in_vect_loop = true;
3173       iv_loop = loop->inner;
3174     }
3175   else
3176     iv_loop = loop;
3177   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3178
3179   latch_e = loop_latch_edge (iv_loop);
3180   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3181
3182   access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
3183   gcc_assert (access_fn);
3184   STRIP_NOPS (access_fn);
3185   ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
3186                                     &init_expr, &step_expr);
3187   gcc_assert (ok);
3188   pe = loop_preheader_edge (iv_loop);
3189
3190   scalar_type = TREE_TYPE (init_expr);
3191   vectype = get_vectype_for_scalar_type (scalar_type);
3192   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3193   gcc_assert (vectype);
3194   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3195   ncopies = vf / nunits;
3196
3197   gcc_assert (phi_info);
3198   gcc_assert (ncopies >= 1);
3199
3200   /* Find the first insertion point in the BB.  */
3201   si = gsi_after_labels (bb);
3202
3203   /* Create the vector that holds the initial_value of the induction.  */
3204   if (nested_in_vect_loop)
3205     {
3206       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3207          been created during vectorization of previous stmts.  We obtain it
3208          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3209       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3210                                            loop_preheader_edge (iv_loop));
3211       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
3212     }
3213   else
3214     {
3215       VEC(constructor_elt,gc) *v;
3216
3217       /* iv_loop is the loop to be vectorized. Create:
3218          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3219       new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
3220       new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
3221       if (stmts)
3222         {
3223           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3224           gcc_assert (!new_bb);
3225         }
3226
3227       v = VEC_alloc (constructor_elt, gc, nunits);
3228       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3229       for (i = 1; i < nunits; i++)
3230         {
3231           /* Create: new_name_i = new_name + step_expr  */
3232           enum tree_code code = POINTER_TYPE_P (scalar_type)
3233                                 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3234           init_stmt = gimple_build_assign_with_ops (code, new_var,
3235                                                     new_name, step_expr);
3236           new_name = make_ssa_name (new_var, init_stmt);
3237           gimple_assign_set_lhs (init_stmt, new_name);
3238
3239           new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3240           gcc_assert (!new_bb);
3241
3242           if (dump_kind_p (MSG_NOTE))
3243             {
3244               dump_printf_loc (MSG_NOTE, vect_location,
3245                                "created new init_stmt: ");
3246               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3247             }
3248           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3249         }
3250       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3251       vec = build_constructor (vectype, v);
3252       vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
3253     }
3254
3255
3256   /* Create the vector that holds the step of the induction.  */
3257   if (nested_in_vect_loop)
3258     /* iv_loop is nested in the loop to be vectorized. Generate:
3259        vec_step = [S, S, S, S]  */
3260     new_name = step_expr;
3261   else
3262     {
3263       /* iv_loop is the loop to be vectorized. Generate:
3264           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3265       expr = build_int_cst (TREE_TYPE (step_expr), vf);
3266       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3267                               expr, step_expr);
3268     }
3269
3270   t = unshare_expr (new_name);
3271   gcc_assert (CONSTANT_CLASS_P (new_name));
3272   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3273   gcc_assert (stepvectype);
3274   vec = build_vector_from_val (stepvectype, t);
3275   vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3276
3277
3278   /* Create the following def-use cycle:
3279      loop prolog:
3280          vec_init = ...
3281          vec_step = ...
3282      loop:
3283          vec_iv = PHI <vec_init, vec_loop>
3284          ...
3285          STMT
3286          ...
3287          vec_loop = vec_iv + vec_step;  */
3288
3289   /* Create the induction-phi that defines the induction-operand.  */
3290   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3291   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3292   set_vinfo_for_stmt (induction_phi,
3293                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3294   induc_def = PHI_RESULT (induction_phi);
3295
3296   /* Create the iv update inside the loop  */
3297   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3298                                            induc_def, vec_step);
3299   vec_def = make_ssa_name (vec_dest, new_stmt);
3300   gimple_assign_set_lhs (new_stmt, vec_def);
3301   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3302   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3303                                                    NULL));
3304
3305   /* Set the arguments of the phi node:  */
3306   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3307   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3308                UNKNOWN_LOCATION);
3309
3310
3311   /* In case that vectorization factor (VF) is bigger than the number
3312      of elements that we can fit in a vectype (nunits), we have to generate
3313      more than one vector stmt - i.e - we need to "unroll" the
3314      vector stmt by a factor VF/nunits.  For more details see documentation
3315      in vectorizable_operation.  */
3316
3317   if (ncopies > 1)
3318     {
3319       stmt_vec_info prev_stmt_vinfo;
3320       /* FORNOW. This restriction should be relaxed.  */
3321       gcc_assert (!nested_in_vect_loop);
3322
3323       /* Create the vector that holds the step of the induction.  */
3324       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3325       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3326                               expr, step_expr);
3327       t = unshare_expr (new_name);
3328       gcc_assert (CONSTANT_CLASS_P (new_name));
3329       vec = build_vector_from_val (stepvectype, t);
3330       vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
3331
3332       vec_def = induc_def;
3333       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3334       for (i = 1; i < ncopies; i++)
3335         {
3336           /* vec_i = vec_prev + vec_step  */
3337           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3338                                                    vec_def, vec_step);
3339           vec_def = make_ssa_name (vec_dest, new_stmt);
3340           gimple_assign_set_lhs (new_stmt, vec_def);
3341
3342           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3343           if (!useless_type_conversion_p (resvectype, vectype))
3344             {
3345               new_stmt = gimple_build_assign_with_ops
3346                   (VIEW_CONVERT_EXPR,
3347                    vect_get_new_vect_var (resvectype, vect_simple_var,
3348                                           "vec_iv_"),
3349                    build1 (VIEW_CONVERT_EXPR, resvectype,
3350                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3351               gimple_assign_set_lhs (new_stmt,
3352                                      make_ssa_name
3353                                        (gimple_assign_lhs (new_stmt), new_stmt));
3354               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3355             }
3356           set_vinfo_for_stmt (new_stmt,
3357                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3358           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3359           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3360         }
3361     }
3362
3363   if (nested_in_vect_loop)
3364     {
3365       /* Find the loop-closed exit-phi of the induction, and record
3366          the final vector of induction results:  */
3367       exit_phi = NULL;
3368       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3369         {
3370           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3371             {
3372               exit_phi = USE_STMT (use_p);
3373               break;
3374             }
3375         }
3376       if (exit_phi)
3377         {
3378           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3379           /* FORNOW. Currently not supporting the case that an inner-loop induction
3380              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3381           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3382                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3383
3384           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3385           if (dump_kind_p (MSG_NOTE))
3386             {
3387               dump_printf_loc (MSG_NOTE, vect_location,
3388                                "vector of inductions after inner-loop:");
3389               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3390             }
3391         }
3392     }
3393
3394
3395   if (dump_kind_p (MSG_NOTE))
3396     {
3397       dump_printf_loc (MSG_NOTE, vect_location,
3398                        "transform induction: created def-use cycle: ");
3399       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3400       dump_printf (MSG_NOTE, "\n");
3401       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3402                         SSA_NAME_DEF_STMT (vec_def), 0);
3403     }
3404
3405   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3406   if (!useless_type_conversion_p (resvectype, vectype))
3407     {
3408       new_stmt = gimple_build_assign_with_ops
3409          (VIEW_CONVERT_EXPR,
3410           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3411           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3412       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3413       gimple_assign_set_lhs (new_stmt, induc_def);
3414       si = gsi_start_bb (bb);
3415       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3416       set_vinfo_for_stmt (new_stmt,
3417                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3418       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3419         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3420     }
3421
3422   return induc_def;
3423 }
3424
3425
3426 /* Function get_initial_def_for_reduction
3427
3428    Input:
3429    STMT - a stmt that performs a reduction operation in the loop.
3430    INIT_VAL - the initial value of the reduction variable
3431
3432    Output:
3433    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3434         of the reduction (used for adjusting the epilog - see below).
3435    Return a vector variable, initialized according to the operation that STMT
3436         performs. This vector will be used as the initial value of the
3437         vector of partial results.
3438
3439    Option1 (adjust in epilog): Initialize the vector as follows:
3440      add/bit or/xor:    [0,0,...,0,0]
3441      mult/bit and:      [1,1,...,1,1]
3442      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3443    and when necessary (e.g. add/mult case) let the caller know
3444    that it needs to adjust the result by init_val.
3445
3446    Option2: Initialize the vector as follows:
3447      add/bit or/xor:    [init_val,0,0,...,0]
3448      mult/bit and:      [init_val,1,1,...,1]
3449      min/max/cond_expr: [init_val,init_val,...,init_val]
3450    and no adjustments are needed.
3451
3452    For example, for the following code:
3453
3454    s = init_val;
3455    for (i=0;i<n;i++)
3456      s = s + a[i];
3457
3458    STMT is 's = s + a[i]', and the reduction variable is 's'.
3459    For a vector of 4 units, we want to return either [0,0,0,init_val],
3460    or [0,0,0,0] and let the caller know that it needs to adjust
3461    the result at the end by 'init_val'.
3462
3463    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3464    initialization vector is simpler (same element in all entries), if
3465    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3466
3467    A cost model should help decide between these two schemes.  */
3468
3469 tree
3470 get_initial_def_for_reduction (gimple stmt, tree init_val,
3471                                tree *adjustment_def)
3472 {
3473   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3474   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3475   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3476   tree scalar_type = TREE_TYPE (init_val);
3477   tree vectype = get_vectype_for_scalar_type (scalar_type);
3478   int nunits;
3479   enum tree_code code = gimple_assign_rhs_code (stmt);
3480   tree def_for_init;
3481   tree init_def;
3482   tree *elts;
3483   int i;
3484   bool nested_in_vect_loop = false;
3485   tree init_value;
3486   REAL_VALUE_TYPE real_init_val = dconst0;
3487   int int_init_val = 0;
3488   gimple def_stmt = NULL;
3489
3490   gcc_assert (vectype);
3491   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3492
3493   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3494               || SCALAR_FLOAT_TYPE_P (scalar_type));
3495
3496   if (nested_in_vect_loop_p (loop, stmt))
3497     nested_in_vect_loop = true;
3498   else
3499     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3500
3501   /* In case of double reduction we only create a vector variable to be put
3502      in the reduction phi node.  The actual statement creation is done in
3503      vect_create_epilog_for_reduction.  */
3504   if (adjustment_def && nested_in_vect_loop
3505       && TREE_CODE (init_val) == SSA_NAME
3506       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3507       && gimple_code (def_stmt) == GIMPLE_PHI
3508       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3509       && vinfo_for_stmt (def_stmt)
3510       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3511           == vect_double_reduction_def)
3512     {
3513       *adjustment_def = NULL;
3514       return vect_create_destination_var (init_val, vectype);
3515     }
3516
3517   if (TREE_CONSTANT (init_val))
3518     {
3519       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3520         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3521       else
3522         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3523     }
3524   else
3525     init_value = init_val;
3526
3527   switch (code)
3528     {
3529       case WIDEN_SUM_EXPR:
3530       case DOT_PROD_EXPR:
3531       case PLUS_EXPR:
3532       case MINUS_EXPR:
3533       case BIT_IOR_EXPR:
3534       case BIT_XOR_EXPR:
3535       case MULT_EXPR:
3536       case BIT_AND_EXPR:
3537         /* ADJUSMENT_DEF is NULL when called from
3538            vect_create_epilog_for_reduction to vectorize double reduction.  */
3539         if (adjustment_def)
3540           {
3541             if (nested_in_vect_loop)
3542               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3543                                                               NULL);
3544             else
3545               *adjustment_def = init_val;
3546           }
3547
3548         if (code == MULT_EXPR)
3549           {
3550             real_init_val = dconst1;
3551             int_init_val = 1;
3552           }
3553
3554         if (code == BIT_AND_EXPR)
3555           int_init_val = -1;
3556
3557         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3558           def_for_init = build_real (scalar_type, real_init_val);
3559         else
3560           def_for_init = build_int_cst (scalar_type, int_init_val);
3561
3562         /* Create a vector of '0' or '1' except the first element.  */
3563         elts = XALLOCAVEC (tree, nunits);
3564         for (i = nunits - 2; i >= 0; --i)
3565           elts[i + 1] = def_for_init;
3566
3567         /* Option1: the first element is '0' or '1' as well.  */
3568         if (adjustment_def)
3569           {
3570             elts[0] = def_for_init;
3571             init_def = build_vector (vectype, elts);
3572             break;
3573           }
3574
3575         /* Option2: the first element is INIT_VAL.  */
3576         elts[0] = init_val;
3577         if (TREE_CONSTANT (init_val))
3578           init_def = build_vector (vectype, elts);
3579         else
3580           {
3581             VEC(constructor_elt,gc) *v;
3582             v = VEC_alloc (constructor_elt, gc, nunits);
3583             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3584             for (i = 1; i < nunits; ++i)
3585               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3586             init_def = build_constructor (vectype, v);
3587           }
3588
3589         break;
3590
3591       case MIN_EXPR:
3592       case MAX_EXPR:
3593       case COND_EXPR:
3594         if (adjustment_def)
3595           {
3596             *adjustment_def = NULL_TREE;
3597             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3598             break;
3599           }
3600
3601         init_def = build_vector_from_val (vectype, init_value);
3602         break;
3603
3604       default:
3605         gcc_unreachable ();
3606     }
3607
3608   return init_def;
3609 }
3610
3611
3612 /* Function vect_create_epilog_for_reduction
3613
3614    Create code at the loop-epilog to finalize the result of a reduction
3615    computation.
3616
3617    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3618      reduction statements.
3619    STMT is the scalar reduction stmt that is being vectorized.
3620    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3621      number of elements that we can fit in a vectype (nunits).  In this case
3622      we have to generate more than one vector stmt - i.e - we need to "unroll"
3623      the vector stmt by a factor VF/nunits.  For more details see documentation
3624      in vectorizable_operation.
3625    REDUC_CODE is the tree-code for the epilog reduction.
3626    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3627      computation.
3628    REDUC_INDEX is the index of the operand in the right hand side of the
3629      statement that is defined by REDUCTION_PHI.
3630    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3631    SLP_NODE is an SLP node containing a group of reduction statements. The
3632      first one in this group is STMT.
3633
3634    This function:
3635    1. Creates the reduction def-use cycles: sets the arguments for
3636       REDUCTION_PHIS:
3637       The loop-entry argument is the vectorized initial-value of the reduction.
3638       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3639       sums.
3640    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3641       by applying the operation specified by REDUC_CODE if available, or by
3642       other means (whole-vector shifts or a scalar loop).
3643       The function also creates a new phi node at the loop exit to preserve
3644       loop-closed form, as illustrated below.
3645
3646      The flow at the entry to this function:
3647
3648         loop:
3649           vec_def = phi <null, null>            # REDUCTION_PHI
3650           VECT_DEF = vector_stmt                # vectorized form of STMT
3651           s_loop = scalar_stmt                  # (scalar) STMT
3652         loop_exit:
3653           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3654           use <s_out0>
3655           use <s_out0>
3656
3657      The above is transformed by this function into:
3658
3659         loop:
3660           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3661           VECT_DEF = vector_stmt                # vectorized form of STMT
3662           s_loop = scalar_stmt                  # (scalar) STMT
3663         loop_exit:
3664           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3665           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3666           v_out2 = reduce <v_out1>
3667           s_out3 = extract_field <v_out2, 0>
3668           s_out4 = adjust_result <s_out3>
3669           use <s_out4>
3670           use <s_out4>
3671 */
3672
3673 static void
3674 vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3675                                   int ncopies, enum tree_code reduc_code,
3676                                   VEC (gimple, heap) *reduction_phis,
3677                                   int reduc_index, bool double_reduc,
3678                                   slp_tree slp_node)
3679 {
3680   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3681   stmt_vec_info prev_phi_info;
3682   tree vectype;
3683   enum machine_mode mode;
3684   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3685   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3686   basic_block exit_bb;
3687   tree scalar_dest;
3688   tree scalar_type;
3689   gimple new_phi = NULL, phi;
3690   gimple_stmt_iterator exit_gsi;
3691   tree vec_dest;
3692   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3693   gimple epilog_stmt = NULL;
3694   enum tree_code code = gimple_assign_rhs_code (stmt);
3695   gimple exit_phi;
3696   tree bitsize, bitpos;
3697   tree adjustment_def = NULL;
3698   tree vec_initial_def = NULL;
3699   tree reduction_op, expr, def;
3700   tree orig_name, scalar_result;
3701   imm_use_iterator imm_iter, phi_imm_iter;
3702   use_operand_p use_p, phi_use_p;
3703   bool extract_scalar_result = false;
3704   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3705   bool nested_in_vect_loop = false;
3706   VEC (gimple, heap) *new_phis = NULL;
3707   VEC (gimple, heap) *inner_phis = NULL;
3708   enum vect_def_type dt = vect_unknown_def_type;
3709   int j, i;
3710   VEC (tree, heap) *scalar_results = NULL;
3711   unsigned int group_size = 1, k, ratio;
3712   VEC (tree, heap) *vec_initial_defs = NULL;
3713   VEC (gimple, heap) *phis;
3714   bool slp_reduc = false;
3715   tree new_phi_result;
3716   gimple inner_phi = NULL;
3717
3718   if (slp_node)
3719     group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3720
3721   if (nested_in_vect_loop_p (loop, stmt))
3722     {
3723       outer_loop = loop;
3724       loop = loop->inner;
3725       nested_in_vect_loop = true;
3726       gcc_assert (!slp_node);
3727     }
3728
3729   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3730     {
3731     case GIMPLE_SINGLE_RHS:
3732       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3733                   == ternary_op);
3734       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3735       break;
3736     case GIMPLE_UNARY_RHS:
3737       reduction_op = gimple_assign_rhs1 (stmt);
3738       break;
3739     case GIMPLE_BINARY_RHS:
3740       reduction_op = reduc_index ?
3741                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3742       break;
3743     case GIMPLE_TERNARY_RHS:
3744       reduction_op = gimple_op (stmt, reduc_index + 1);
3745       break;
3746     default:
3747       gcc_unreachable ();
3748     }
3749
3750   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3751   gcc_assert (vectype);
3752   mode = TYPE_MODE (vectype);
3753
3754   /* 1. Create the reduction def-use cycle:
3755      Set the arguments of REDUCTION_PHIS, i.e., transform
3756
3757         loop:
3758           vec_def = phi <null, null>            # REDUCTION_PHI
3759           VECT_DEF = vector_stmt                # vectorized form of STMT
3760           ...
3761
3762      into:
3763
3764         loop:
3765           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3766           VECT_DEF = vector_stmt                # vectorized form of STMT
3767           ...
3768
3769      (in case of SLP, do it for all the phis). */
3770
3771   /* Get the loop-entry arguments.  */
3772   if (slp_node)
3773     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3774                        NULL, slp_node, reduc_index);
3775   else
3776     {
3777       vec_initial_defs = VEC_alloc (tree, heap, 1);
3778      /* For the case of reduction, vect_get_vec_def_for_operand returns
3779         the scalar def before the loop, that defines the initial value
3780         of the reduction variable.  */
3781       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3782                                                       &adjustment_def);
3783       VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3784     }
3785
3786   /* Set phi nodes arguments.  */
3787   FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3788     {
3789       tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3790       tree def = VEC_index (tree, vect_defs, i);
3791       for (j = 0; j < ncopies; j++)
3792         {
3793           /* Set the loop-entry arg of the reduction-phi.  */
3794           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3795                        UNKNOWN_LOCATION);
3796
3797           /* Set the loop-latch arg for the reduction-phi.  */
3798           if (j > 0)
3799             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3800
3801           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3802
3803           if (dump_kind_p (MSG_NOTE))
3804             {
3805               dump_printf_loc (MSG_NOTE, vect_location,
3806                                "transform reduction: created def-use cycle: ");
3807               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3808               dump_printf (MSG_NOTE, "\n");
3809               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3810             }
3811
3812           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3813         }
3814     }
3815
3816   VEC_free (tree, heap, vec_initial_defs);
3817
3818   /* 2. Create epilog code.
3819         The reduction epilog code operates across the elements of the vector
3820         of partial results computed by the vectorized loop.
3821         The reduction epilog code consists of:
3822
3823         step 1: compute the scalar result in a vector (v_out2)
3824         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3825         step 3: adjust the scalar result (s_out3) if needed.
3826
3827         Step 1 can be accomplished using one the following three schemes:
3828           (scheme 1) using reduc_code, if available.
3829           (scheme 2) using whole-vector shifts, if available.
3830           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3831                      combined.
3832
3833           The overall epilog code looks like this:
3834
3835           s_out0 = phi <s_loop>         # original EXIT_PHI
3836           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3837           v_out2 = reduce <v_out1>              # step 1
3838           s_out3 = extract_field <v_out2, 0>    # step 2
3839           s_out4 = adjust_result <s_out3>       # step 3
3840
3841           (step 3 is optional, and steps 1 and 2 may be combined).
3842           Lastly, the uses of s_out0 are replaced by s_out4.  */
3843
3844
3845   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3846          v_out1 = phi <VECT_DEF>
3847          Store them in NEW_PHIS.  */
3848
3849   exit_bb = single_exit (loop)->dest;
3850   prev_phi_info = NULL;
3851   new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3852   FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3853     {
3854       for (j = 0; j < ncopies; j++)
3855         {
3856           tree new_def = copy_ssa_name (def, NULL);
3857           phi = create_phi_node (new_def, exit_bb);
3858           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3859           if (j == 0)
3860             VEC_quick_push (gimple, new_phis, phi);
3861           else
3862             {
3863               def = vect_get_vec_def_for_stmt_copy (dt, def);
3864               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3865             }
3866
3867           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3868           prev_phi_info = vinfo_for_stmt (phi);
3869         }
3870     }
3871
3872   /* The epilogue is created for the outer-loop, i.e., for the loop being
3873      vectorized.  Create exit phis for the outer loop.  */
3874   if (double_reduc)
3875     {
3876       loop = outer_loop;
3877       exit_bb = single_exit (loop)->dest;
3878       inner_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3879       FOR_EACH_VEC_ELT (gimple, new_phis, i, phi)
3880         {
3881           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3882           gimple outer_phi = create_phi_node (new_result, exit_bb);
3883           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3884                            PHI_RESULT (phi));
3885           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3886                                                             loop_vinfo, NULL));
3887           VEC_quick_push (gimple, inner_phis, phi);
3888           VEC_replace (gimple, new_phis, i, outer_phi);
3889           prev_phi_info = vinfo_for_stmt (outer_phi);
3890           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3891             {
3892               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3893               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3894               outer_phi = create_phi_node (new_result, exit_bb);
3895               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3896                                PHI_RESULT (phi));
3897               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3898                                                         loop_vinfo, NULL));
3899               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3900               prev_phi_info = vinfo_for_stmt (outer_phi);
3901             }
3902         }
3903     }
3904
3905   exit_gsi = gsi_after_labels (exit_bb);
3906
3907   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3908          (i.e. when reduc_code is not available) and in the final adjustment
3909          code (if needed).  Also get the original scalar reduction variable as
3910          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3911          represents a reduction pattern), the tree-code and scalar-def are
3912          taken from the original stmt that the pattern-stmt (STMT) replaces.
3913          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3914          are taken from STMT.  */
3915
3916   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3917   if (!orig_stmt)
3918     {
3919       /* Regular reduction  */
3920       orig_stmt = stmt;
3921     }
3922   else
3923     {
3924       /* Reduction pattern  */
3925       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3926       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3927       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3928     }
3929
3930   code = gimple_assign_rhs_code (orig_stmt);
3931   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3932      partial results are added and not subtracted.  */
3933   if (code == MINUS_EXPR)
3934     code = PLUS_EXPR;
3935
3936   scalar_dest = gimple_assign_lhs (orig_stmt);
3937   scalar_type = TREE_TYPE (scalar_dest);
3938   scalar_results = VEC_alloc (tree, heap, group_size);
3939   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3940   bitsize = TYPE_SIZE (scalar_type);
3941
3942   /* In case this is a reduction in an inner-loop while vectorizing an outer
3943      loop - we don't need to extract a single scalar result at the end of the
3944      inner-loop (unless it is double reduction, i.e., the use of reduction is
3945      outside the outer-loop).  The final vector of partial results will be used
3946      in the vectorized outer-loop, or reduced to a scalar result at the end of
3947      the outer-loop.  */
3948   if (nested_in_vect_loop && !double_reduc)
3949     goto vect_finalize_reduction;
3950
3951   /* SLP reduction without reduction chain, e.g.,
3952      # a1 = phi <a2, a0>
3953      # b1 = phi <b2, b0>
3954      a2 = operation (a1)
3955      b2 = operation (b1)  */
3956   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3957
3958   /* In case of reduction chain, e.g.,
3959      # a1 = phi <a3, a0>
3960      a2 = operation (a1)
3961      a3 = operation (a2),
3962
3963      we may end up with more than one vector result.  Here we reduce them to
3964      one vector.  */
3965   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3966     {
3967       tree first_vect = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3968       tree tmp;
3969       gimple new_vec_stmt = NULL;
3970
3971       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3972       for (k = 1; k < VEC_length (gimple, new_phis); k++)
3973         {
3974           gimple next_phi = VEC_index (gimple, new_phis, k);
3975           tree second_vect = PHI_RESULT (next_phi);
3976
3977           tmp = build2 (code, vectype,  first_vect, second_vect);
3978           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3979           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3980           gimple_assign_set_lhs (new_vec_stmt, first_vect);
3981           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3982         }
3983
3984       new_phi_result = first_vect;
3985       if (new_vec_stmt)
3986         {
3987           VEC_truncate (gimple, new_phis, 0);
3988           VEC_safe_push (gimple, heap, new_phis, new_vec_stmt);
3989         }
3990     }
3991   else
3992     new_phi_result = PHI_RESULT (VEC_index (gimple, new_phis, 0));
3993
3994   /* 2.3 Create the reduction code, using one of the three schemes described
3995          above. In SLP we simply need to extract all the elements from the
3996          vector (without reducing them), so we use scalar shifts.  */
3997   if (reduc_code != ERROR_MARK && !slp_reduc)
3998     {
3999       tree tmp;
4000
4001       /*** Case 1:  Create:
4002            v_out2 = reduc_expr <v_out1>  */
4003
4004       if (dump_kind_p (MSG_NOTE))
4005         dump_printf_loc (MSG_NOTE, vect_location,
4006                          "Reduce using direct vector reduction.");
4007
4008       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4009       tmp = build1 (reduc_code, vectype, new_phi_result);
4010       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4011       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4012       gimple_assign_set_lhs (epilog_stmt, new_temp);
4013       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4014
4015       extract_scalar_result = true;
4016     }
4017   else
4018     {
4019       enum tree_code shift_code = ERROR_MARK;
4020       bool have_whole_vector_shift = true;
4021       int bit_offset;
4022       int element_bitsize = tree_low_cst (bitsize, 1);
4023       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4024       tree vec_temp;
4025
4026       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4027         shift_code = VEC_RSHIFT_EXPR;
4028       else
4029         have_whole_vector_shift = false;
4030
4031       /* Regardless of whether we have a whole vector shift, if we're
4032          emulating the operation via tree-vect-generic, we don't want
4033          to use it.  Only the first round of the reduction is likely
4034          to still be profitable via emulation.  */
4035       /* ??? It might be better to emit a reduction tree code here, so that
4036          tree-vect-generic can expand the first round via bit tricks.  */
4037       if (!VECTOR_MODE_P (mode))
4038         have_whole_vector_shift = false;
4039       else
4040         {
4041           optab optab = optab_for_tree_code (code, vectype, optab_default);
4042           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4043             have_whole_vector_shift = false;
4044         }
4045
4046       if (have_whole_vector_shift && !slp_reduc)
4047         {
4048           /*** Case 2: Create:
4049              for (offset = VS/2; offset >= element_size; offset/=2)
4050                 {
4051                   Create:  va' = vec_shift <va, offset>
4052                   Create:  va = vop <va, va'>
4053                 }  */
4054
4055           if (dump_kind_p (MSG_NOTE))
4056             dump_printf_loc (MSG_NOTE, vect_location,
4057                              "Reduce using vector shifts");
4058
4059           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4060           new_temp = new_phi_result;
4061           for (bit_offset = vec_size_in_bits/2;
4062                bit_offset >= element_bitsize;
4063                bit_offset /= 2)
4064             {
4065               tree bitpos = size_int (bit_offset);
4066
4067               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4068                                                vec_dest, new_temp, bitpos);
4069               new_name = make_ssa_name (vec_dest, epilog_stmt);
4070               gimple_assign_set_lhs (epilog_stmt, new_name);
4071               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4072
4073               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4074                                                           new_name, new_temp);
4075               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4076               gimple_assign_set_lhs (epilog_stmt, new_temp);
4077               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4078             }
4079
4080           extract_scalar_result = true;
4081         }
4082       else
4083         {
4084           tree rhs;
4085
4086           /*** Case 3: Create:
4087              s = extract_field <v_out2, 0>
4088              for (offset = element_size;
4089                   offset < vector_size;
4090                   offset += element_size;)
4091                {
4092                  Create:  s' = extract_field <v_out2, offset>
4093                  Create:  s = op <s, s'>  // For non SLP cases
4094                }  */
4095
4096           if (dump_kind_p (MSG_NOTE))
4097             dump_printf_loc (MSG_NOTE, vect_location,
4098                              "Reduce using scalar code. ");
4099
4100           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4101           FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
4102             {
4103               if (gimple_code (new_phi) == GIMPLE_PHI)
4104                 vec_temp = PHI_RESULT (new_phi);
4105               else
4106                 vec_temp = gimple_assign_lhs (new_phi);
4107               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4108                             bitsize_zero_node);
4109               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4110               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4111               gimple_assign_set_lhs (epilog_stmt, new_temp);
4112               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4113
4114               /* In SLP we don't need to apply reduction operation, so we just
4115                  collect s' values in SCALAR_RESULTS.  */
4116               if (slp_reduc)
4117                 VEC_safe_push (tree, heap, scalar_results, new_temp);
4118
4119               for (bit_offset = element_bitsize;
4120                    bit_offset < vec_size_in_bits;
4121                    bit_offset += element_bitsize)
4122                 {
4123                   tree bitpos = bitsize_int (bit_offset);
4124                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4125                                      bitsize, bitpos);
4126
4127                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4128                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4129                   gimple_assign_set_lhs (epilog_stmt, new_name);
4130                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4131
4132                   if (slp_reduc)
4133                     {
4134                       /* In SLP we don't need to apply reduction operation, so
4135                          we just collect s' values in SCALAR_RESULTS.  */
4136                       new_temp = new_name;
4137                       VEC_safe_push (tree, heap, scalar_results, new_name);
4138                     }
4139                   else
4140                     {
4141                       epilog_stmt = gimple_build_assign_with_ops (code,
4142                                           new_scalar_dest, new_name, new_temp);
4143                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4144                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4145                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4146                     }
4147                 }
4148             }
4149
4150           /* The only case where we need to reduce scalar results in SLP, is
4151              unrolling.  If the size of SCALAR_RESULTS is greater than
4152              GROUP_SIZE, we reduce them combining elements modulo
4153              GROUP_SIZE.  */
4154           if (slp_reduc)
4155             {
4156               tree res, first_res, new_res;
4157               gimple new_stmt;
4158
4159               /* Reduce multiple scalar results in case of SLP unrolling.  */
4160               for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
4161                    j++)
4162                 {
4163                   first_res = VEC_index (tree, scalar_results, j % group_size);
4164                   new_stmt = gimple_build_assign_with_ops (code,
4165                                               new_scalar_dest, first_res, res);
4166                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4167                   gimple_assign_set_lhs (new_stmt, new_res);
4168                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4169                   VEC_replace (tree, scalar_results, j % group_size, new_res);
4170                 }
4171             }
4172           else
4173             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4174             VEC_safe_push (tree, heap, scalar_results, new_temp);
4175
4176           extract_scalar_result = false;
4177         }
4178     }
4179
4180   /* 2.4  Extract the final scalar result.  Create:
4181           s_out3 = extract_field <v_out2, bitpos>  */
4182
4183   if (extract_scalar_result)
4184     {
4185       tree rhs;
4186
4187       if (dump_kind_p (MSG_NOTE))
4188         dump_printf_loc (MSG_NOTE, vect_location,
4189                          "extract scalar result");
4190
4191       if (BYTES_BIG_ENDIAN)
4192         bitpos = size_binop (MULT_EXPR,
4193                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4194                              TYPE_SIZE (scalar_type));
4195       else
4196         bitpos = bitsize_zero_node;
4197
4198       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4199       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4200       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4201       gimple_assign_set_lhs (epilog_stmt, new_temp);
4202       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4203       VEC_safe_push (tree, heap, scalar_results, new_temp);
4204     }
4205
4206 vect_finalize_reduction:
4207
4208   if (double_reduc)
4209     loop = loop->inner;
4210
4211   /* 2.5 Adjust the final result by the initial value of the reduction
4212          variable. (When such adjustment is not needed, then
4213          'adjustment_def' is zero).  For example, if code is PLUS we create:
4214          new_temp = loop_exit_def + adjustment_def  */
4215
4216   if (adjustment_def)
4217     {
4218       gcc_assert (!slp_reduc);
4219       if (nested_in_vect_loop)
4220         {
4221           new_phi = VEC_index (gimple, new_phis, 0);
4222           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4223           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4224           new_dest = vect_create_destination_var (scalar_dest, vectype);
4225         }
4226       else
4227         {
4228           new_temp = VEC_index (tree, scalar_results, 0);
4229           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4230           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4231           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4232         }
4233
4234       epilog_stmt = gimple_build_assign (new_dest, expr);
4235       new_temp = make_ssa_name (new_dest, epilog_stmt);
4236       gimple_assign_set_lhs (epilog_stmt, new_temp);
4237       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4238       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4239       if (nested_in_vect_loop)
4240         {
4241           set_vinfo_for_stmt (epilog_stmt,
4242                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4243                                                  NULL));
4244           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4245                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4246
4247           if (!double_reduc)
4248             VEC_quick_push (tree, scalar_results, new_temp);
4249           else
4250             VEC_replace (tree, scalar_results, 0, new_temp);
4251         }
4252       else
4253         VEC_replace (tree, scalar_results, 0, new_temp);
4254
4255       VEC_replace (gimple, new_phis, 0, epilog_stmt);
4256     }
4257
4258   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4259           phis with new adjusted scalar results, i.e., replace use <s_out0>
4260           with use <s_out4>.
4261
4262      Transform:
4263         loop_exit:
4264           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4265           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4266           v_out2 = reduce <v_out1>
4267           s_out3 = extract_field <v_out2, 0>
4268           s_out4 = adjust_result <s_out3>
4269           use <s_out0>
4270           use <s_out0>
4271
4272      into:
4273
4274         loop_exit:
4275           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4276           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4277           v_out2 = reduce <v_out1>
4278           s_out3 = extract_field <v_out2, 0>
4279           s_out4 = adjust_result <s_out3>
4280           use <s_out4>
4281           use <s_out4> */
4282
4283
4284   /* In SLP reduction chain we reduce vector results into one vector if
4285      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4286      the last stmt in the reduction chain, since we are looking for the loop
4287      exit phi node.  */
4288   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4289     {
4290       scalar_dest = gimple_assign_lhs (VEC_index (gimple,
4291                                        SLP_TREE_SCALAR_STMTS (slp_node),
4292                                        group_size - 1));
4293       group_size = 1;
4294     }
4295
4296   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4297      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4298      need to match SCALAR_RESULTS with corresponding statements.  The first
4299      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4300      the first vector stmt, etc.
4301      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4302   if (group_size > VEC_length (gimple, new_phis))
4303     {
4304       ratio = group_size / VEC_length (gimple, new_phis);
4305       gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
4306     }
4307   else
4308     ratio = 1;
4309
4310   for (k = 0; k < group_size; k++)
4311     {
4312       if (k % ratio == 0)
4313         {
4314           epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
4315           reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
4316           if (double_reduc)
4317             inner_phi = VEC_index (gimple, inner_phis, k / ratio);
4318         }
4319
4320       if (slp_reduc)
4321         {
4322           gimple current_stmt = VEC_index (gimple,
4323                                        SLP_TREE_SCALAR_STMTS (slp_node), k);
4324
4325           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4326           /* SLP statements can't participate in patterns.  */
4327           gcc_assert (!orig_stmt);
4328           scalar_dest = gimple_assign_lhs (current_stmt);
4329         }
4330
4331       phis = VEC_alloc (gimple, heap, 3);
4332       /* Find the loop-closed-use at the loop exit of the original scalar
4333          result.  (The reduction result is expected to have two immediate uses -
4334          one at the latch block, and one at the loop exit).  */
4335       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4336         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4337           VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4338
4339       /* We expect to have found an exit_phi because of loop-closed-ssa
4340          form.  */
4341       gcc_assert (!VEC_empty (gimple, phis));
4342
4343       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4344         {
4345           if (outer_loop)
4346             {
4347               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4348               gimple vect_phi;
4349
4350               /* FORNOW. Currently not supporting the case that an inner-loop
4351                  reduction is not used in the outer-loop (but only outside the
4352                  outer-loop), unless it is double reduction.  */
4353               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4354                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4355                           || double_reduc);
4356
4357               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4358               if (!double_reduc
4359                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4360                       != vect_double_reduction_def)
4361                 continue;
4362
4363               /* Handle double reduction:
4364
4365                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4366                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4367                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4368                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4369
4370                  At that point the regular reduction (stmt2 and stmt3) is
4371                  already vectorized, as well as the exit phi node, stmt4.
4372                  Here we vectorize the phi node of double reduction, stmt1, and
4373                  update all relevant statements.  */
4374
4375               /* Go through all the uses of s2 to find double reduction phi
4376                  node, i.e., stmt1 above.  */
4377               orig_name = PHI_RESULT (exit_phi);
4378               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4379                 {
4380                   stmt_vec_info use_stmt_vinfo;
4381                   stmt_vec_info new_phi_vinfo;
4382                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4383                   basic_block bb = gimple_bb (use_stmt);
4384                   gimple use;
4385
4386                   /* Check that USE_STMT is really double reduction phi
4387                      node.  */
4388                   if (gimple_code (use_stmt) != GIMPLE_PHI
4389                       || gimple_phi_num_args (use_stmt) != 2
4390                       || bb->loop_father != outer_loop)
4391                     continue;
4392                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4393                   if (!use_stmt_vinfo
4394                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4395                           != vect_double_reduction_def)
4396                     continue;
4397
4398                   /* Create vector phi node for double reduction:
4399                      vs1 = phi <vs0, vs2>
4400                      vs1 was created previously in this function by a call to
4401                        vect_get_vec_def_for_operand and is stored in
4402                        vec_initial_def;
4403                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4404                      vs0 is created here.  */
4405
4406                   /* Create vector phi node.  */
4407                   vect_phi = create_phi_node (vec_initial_def, bb);
4408                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4409                                     loop_vec_info_for_loop (outer_loop), NULL);
4410                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4411
4412                   /* Create vs0 - initial def of the double reduction phi.  */
4413                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4414                                              loop_preheader_edge (outer_loop));
4415                   init_def = get_initial_def_for_reduction (stmt,
4416                                                           preheader_arg, NULL);
4417                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4418                                                     vectype, NULL);
4419
4420                   /* Update phi node arguments with vs0 and vs2.  */
4421                   add_phi_arg (vect_phi, vect_phi_init,
4422                                loop_preheader_edge (outer_loop),
4423                                UNKNOWN_LOCATION);
4424                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4425                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4426                   if (dump_kind_p (MSG_NOTE))
4427                     {
4428                       dump_printf_loc (MSG_NOTE, vect_location,
4429                                        "created double reduction phi node: ");
4430                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4431                     }
4432
4433                   vect_phi_res = PHI_RESULT (vect_phi);
4434
4435                   /* Replace the use, i.e., set the correct vs1 in the regular
4436                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4437                      loop is redundant.  */
4438                   use = reduction_phi;
4439                   for (j = 0; j < ncopies; j++)
4440                     {
4441                       edge pr_edge = loop_preheader_edge (loop);
4442                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4443                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4444                     }
4445                 }
4446             }
4447         }
4448
4449       VEC_free (gimple, heap, phis);
4450       if (nested_in_vect_loop)
4451         {
4452           if (double_reduc)
4453             loop = outer_loop;
4454           else
4455             continue;
4456         }
4457
4458       phis = VEC_alloc (gimple, heap, 3);
4459       /* Find the loop-closed-use at the loop exit of the original scalar
4460          result.  (The reduction result is expected to have two immediate uses,
4461          one at the latch block, and one at the loop exit).  For double
4462          reductions we are looking for exit phis of the outer loop.  */
4463       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4464         {
4465           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4466             VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
4467           else
4468             {
4469               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4470                 {
4471                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4472
4473                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4474                     {
4475                       if (!flow_bb_inside_loop_p (loop,
4476                                              gimple_bb (USE_STMT (phi_use_p))))
4477                         VEC_safe_push (gimple, heap, phis,
4478                                        USE_STMT (phi_use_p));
4479                     }
4480                 }
4481             }
4482         }
4483
4484       FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
4485         {
4486           /* Replace the uses:  */
4487           orig_name = PHI_RESULT (exit_phi);
4488           scalar_result = VEC_index (tree, scalar_results, k);
4489           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4490             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4491               SET_USE (use_p, scalar_result);
4492         }
4493
4494       VEC_free (gimple, heap, phis);
4495     }
4496
4497   VEC_free (tree, heap, scalar_results);
4498   VEC_free (gimple, heap, new_phis);
4499 }
4500
4501
4502 /* Function vectorizable_reduction.
4503
4504    Check if STMT performs a reduction operation that can be vectorized.
4505    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4506    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4507    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4508
4509    This function also handles reduction idioms (patterns) that have been
4510    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4511    of this form:
4512      X = pattern_expr (arg0, arg1, ..., X)
4513    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4514    sequence that had been detected and replaced by the pattern-stmt (STMT).
4515
4516    In some cases of reduction patterns, the type of the reduction variable X is
4517    different than the type of the other arguments of STMT.
4518    In such cases, the vectype that is used when transforming STMT into a vector
4519    stmt is different than the vectype that is used to determine the
4520    vectorization factor, because it consists of a different number of elements
4521    than the actual number of elements that are being operated upon in parallel.
4522
4523    For example, consider an accumulation of shorts into an int accumulator.
4524    On some targets it's possible to vectorize this pattern operating on 8
4525    shorts at a time (hence, the vectype for purposes of determining the
4526    vectorization factor should be V8HI); on the other hand, the vectype that
4527    is used to create the vector form is actually V4SI (the type of the result).
4528
4529    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4530    indicates what is the actual level of parallelism (V8HI in the example), so
4531    that the right vectorization factor would be derived.  This vectype
4532    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4533    be used to create the vectorized stmt.  The right vectype for the vectorized
4534    stmt is obtained from the type of the result X:
4535         get_vectype_for_scalar_type (TREE_TYPE (X))
4536
4537    This means that, contrary to "regular" reductions (or "regular" stmts in
4538    general), the following equation:
4539       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4540    does *NOT* necessarily hold for reduction patterns.  */
4541
4542 bool
4543 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4544                         gimple *vec_stmt, slp_tree slp_node)
4545 {
4546   tree vec_dest;
4547   tree scalar_dest;
4548   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4549   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4550   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4551   tree vectype_in = NULL_TREE;
4552   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4553   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4554   enum tree_code code, orig_code, epilog_reduc_code;
4555   enum machine_mode vec_mode;
4556   int op_type;
4557   optab optab, reduc_optab;
4558   tree new_temp = NULL_TREE;
4559   tree def;
4560   gimple def_stmt;
4561   enum vect_def_type dt;
4562   gimple new_phi = NULL;
4563   tree scalar_type;
4564   bool is_simple_use;
4565   gimple orig_stmt;
4566   stmt_vec_info orig_stmt_info;
4567   tree expr = NULL_TREE;
4568   int i;
4569   int ncopies;
4570   int epilog_copies;
4571   stmt_vec_info prev_stmt_info, prev_phi_info;
4572   bool single_defuse_cycle = false;
4573   tree reduc_def = NULL_TREE;
4574   gimple new_stmt = NULL;
4575   int j;
4576   tree ops[3];
4577   bool nested_cycle = false, found_nested_cycle_def = false;
4578   gimple reduc_def_stmt = NULL;
4579   /* The default is that the reduction variable is the last in statement.  */
4580   int reduc_index = 2;
4581   bool double_reduc = false, dummy;
4582   basic_block def_bb;
4583   struct loop * def_stmt_loop, *outer_loop = NULL;
4584   tree def_arg;
4585   gimple def_arg_stmt;
4586   VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
4587   VEC (gimple, heap) *phis = NULL;
4588   int vec_num;
4589   tree def0, def1, tem, op0, op1 = NULL_TREE;
4590
4591   /* In case of reduction chain we switch to the first stmt in the chain, but
4592      we don't update STMT_INFO, since only the last stmt is marked as reduction
4593      and has reduction properties.  */
4594   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4595     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4596
4597   if (nested_in_vect_loop_p (loop, stmt))
4598     {
4599       outer_loop = loop;
4600       loop = loop->inner;
4601       nested_cycle = true;
4602     }
4603
4604   /* 1. Is vectorizable reduction?  */
4605   /* Not supportable if the reduction variable is used in the loop, unless
4606      it's a reduction chain.  */
4607   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4608       && !GROUP_FIRST_ELEMENT (stmt_info))
4609     return false;
4610
4611   /* Reductions that are not used even in an enclosing outer-loop,
4612      are expected to be "live" (used out of the loop).  */
4613   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4614       && !STMT_VINFO_LIVE_P (stmt_info))
4615     return false;
4616
4617   /* Make sure it was already recognized as a reduction computation.  */
4618   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4619       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4620     return false;
4621
4622   /* 2. Has this been recognized as a reduction pattern?
4623
4624      Check if STMT represents a pattern that has been recognized
4625      in earlier analysis stages.  For stmts that represent a pattern,
4626      the STMT_VINFO_RELATED_STMT field records the last stmt in
4627      the original sequence that constitutes the pattern.  */
4628
4629   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4630   if (orig_stmt)
4631     {
4632       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4633       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
4634       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4635       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4636     }
4637
4638   /* 3. Check the operands of the operation.  The first operands are defined
4639         inside the loop body. The last operand is the reduction variable,
4640         which is defined by the loop-header-phi.  */
4641
4642   gcc_assert (is_gimple_assign (stmt));
4643
4644   /* Flatten RHS.  */
4645   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4646     {
4647     case GIMPLE_SINGLE_RHS:
4648       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4649       if (op_type == ternary_op)
4650         {
4651           tree rhs = gimple_assign_rhs1 (stmt);
4652           ops[0] = TREE_OPERAND (rhs, 0);
4653           ops[1] = TREE_OPERAND (rhs, 1);
4654           ops[2] = TREE_OPERAND (rhs, 2);
4655           code = TREE_CODE (rhs);
4656         }
4657       else
4658         return false;
4659       break;
4660
4661     case GIMPLE_BINARY_RHS:
4662       code = gimple_assign_rhs_code (stmt);
4663       op_type = TREE_CODE_LENGTH (code);
4664       gcc_assert (op_type == binary_op);
4665       ops[0] = gimple_assign_rhs1 (stmt);
4666       ops[1] = gimple_assign_rhs2 (stmt);
4667       break;
4668
4669     case GIMPLE_TERNARY_RHS:
4670       code = gimple_assign_rhs_code (stmt);
4671       op_type = TREE_CODE_LENGTH (code);
4672       gcc_assert (op_type == ternary_op);
4673       ops[0] = gimple_assign_rhs1 (stmt);
4674       ops[1] = gimple_assign_rhs2 (stmt);
4675       ops[2] = gimple_assign_rhs3 (stmt);
4676       break;
4677
4678     case GIMPLE_UNARY_RHS:
4679       return false;
4680
4681     default:
4682       gcc_unreachable ();
4683     }
4684
4685   if (code == COND_EXPR && slp_node)
4686     return false;
4687
4688   scalar_dest = gimple_assign_lhs (stmt);
4689   scalar_type = TREE_TYPE (scalar_dest);
4690   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4691       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4692     return false;
4693
4694   /* Do not try to vectorize bit-precision reductions.  */
4695   if ((TYPE_PRECISION (scalar_type)
4696        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4697     return false;
4698
4699   /* All uses but the last are expected to be defined in the loop.
4700      The last use is the reduction variable.  In case of nested cycle this
4701      assumption is not true: we use reduc_index to record the index of the
4702      reduction variable.  */
4703   for (i = 0; i < op_type-1; i++)
4704     {
4705       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4706       if (i == 0 && code == COND_EXPR)
4707         continue;
4708
4709       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4710                                             &def_stmt, &def, &dt, &tem);
4711       if (!vectype_in)
4712         vectype_in = tem;
4713       gcc_assert (is_simple_use);
4714
4715       if (dt != vect_internal_def
4716           && dt != vect_external_def
4717           && dt != vect_constant_def
4718           && dt != vect_induction_def
4719           && !(dt == vect_nested_cycle && nested_cycle))
4720         return false;
4721
4722       if (dt == vect_nested_cycle)
4723         {
4724           found_nested_cycle_def = true;
4725           reduc_def_stmt = def_stmt;
4726           reduc_index = i;
4727         }
4728     }
4729
4730   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4731                                         &def_stmt, &def, &dt, &tem);
4732   if (!vectype_in)
4733     vectype_in = tem;
4734   gcc_assert (is_simple_use);
4735   gcc_assert (dt == vect_reduction_def
4736               || dt == vect_nested_cycle
4737               || ((dt == vect_internal_def || dt == vect_external_def
4738                    || dt == vect_constant_def || dt == vect_induction_def)
4739                    && nested_cycle && found_nested_cycle_def));
4740   if (!found_nested_cycle_def)
4741     reduc_def_stmt = def_stmt;
4742
4743   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4744   if (orig_stmt)
4745     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4746                                                        reduc_def_stmt,
4747                                                        !nested_cycle,
4748                                                        &dummy));
4749   else
4750     {
4751       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4752                                              !nested_cycle, &dummy);
4753       /* We changed STMT to be the first stmt in reduction chain, hence we
4754          check that in this case the first element in the chain is STMT.  */
4755       gcc_assert (stmt == tmp
4756                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4757     }
4758
4759   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4760     return false;
4761
4762   if (slp_node || PURE_SLP_STMT (stmt_info))
4763     ncopies = 1;
4764   else
4765     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4766                / TYPE_VECTOR_SUBPARTS (vectype_in));
4767
4768   gcc_assert (ncopies >= 1);
4769
4770   vec_mode = TYPE_MODE (vectype_in);
4771
4772   if (code == COND_EXPR)
4773     {
4774       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4775         {
4776           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4777             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4778                              "unsupported condition in reduction");
4779
4780             return false;
4781         }
4782     }
4783   else
4784     {
4785       /* 4. Supportable by target?  */
4786
4787       /* 4.1. check support for the operation in the loop  */
4788       optab = optab_for_tree_code (code, vectype_in, optab_default);
4789       if (!optab)
4790         {
4791           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4792             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4793                              "no optab.");
4794
4795           return false;
4796         }
4797
4798       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4799         {
4800           if (dump_kind_p (MSG_NOTE))
4801             dump_printf (MSG_NOTE, "op not supported by target.");
4802
4803           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4804               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4805                   < vect_min_worthwhile_factor (code))
4806             return false;
4807
4808           if (dump_kind_p (MSG_NOTE))
4809             dump_printf (MSG_NOTE, "proceeding using word mode.");
4810         }
4811
4812       /* Worthwhile without SIMD support?  */
4813       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4814           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4815              < vect_min_worthwhile_factor (code))
4816         {
4817           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4818             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4819                              "not worthwhile without SIMD support.");
4820
4821           return false;
4822         }
4823     }
4824
4825   /* 4.2. Check support for the epilog operation.
4826
4827           If STMT represents a reduction pattern, then the type of the
4828           reduction variable may be different than the type of the rest
4829           of the arguments.  For example, consider the case of accumulation
4830           of shorts into an int accumulator; The original code:
4831                         S1: int_a = (int) short_a;
4832           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4833
4834           was replaced with:
4835                         STMT: int_acc = widen_sum <short_a, int_acc>
4836
4837           This means that:
4838           1. The tree-code that is used to create the vector operation in the
4839              epilog code (that reduces the partial results) is not the
4840              tree-code of STMT, but is rather the tree-code of the original
4841              stmt from the pattern that STMT is replacing.  I.e, in the example
4842              above we want to use 'widen_sum' in the loop, but 'plus' in the
4843              epilog.
4844           2. The type (mode) we use to check available target support
4845              for the vector operation to be created in the *epilog*, is
4846              determined by the type of the reduction variable (in the example
4847              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4848              However the type (mode) we use to check available target support
4849              for the vector operation to be created *inside the loop*, is
4850              determined by the type of the other arguments to STMT (in the
4851              example we'd check this: optab_handler (widen_sum_optab,
4852              vect_short_mode)).
4853
4854           This is contrary to "regular" reductions, in which the types of all
4855           the arguments are the same as the type of the reduction variable.
4856           For "regular" reductions we can therefore use the same vector type
4857           (and also the same tree-code) when generating the epilog code and
4858           when generating the code inside the loop.  */
4859
4860   if (orig_stmt)
4861     {
4862       /* This is a reduction pattern: get the vectype from the type of the
4863          reduction variable, and get the tree-code from orig_stmt.  */
4864       orig_code = gimple_assign_rhs_code (orig_stmt);
4865       gcc_assert (vectype_out);
4866       vec_mode = TYPE_MODE (vectype_out);
4867     }
4868   else
4869     {
4870       /* Regular reduction: use the same vectype and tree-code as used for
4871          the vector code inside the loop can be used for the epilog code. */
4872       orig_code = code;
4873     }
4874
4875   if (nested_cycle)
4876     {
4877       def_bb = gimple_bb (reduc_def_stmt);
4878       def_stmt_loop = def_bb->loop_father;
4879       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4880                                        loop_preheader_edge (def_stmt_loop));
4881       if (TREE_CODE (def_arg) == SSA_NAME
4882           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4883           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4884           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4885           && vinfo_for_stmt (def_arg_stmt)
4886           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4887               == vect_double_reduction_def)
4888         double_reduc = true;
4889     }
4890
4891   epilog_reduc_code = ERROR_MARK;
4892   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4893     {
4894       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4895                                          optab_default);
4896       if (!reduc_optab)
4897         {
4898           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4899             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4900                              "no optab for reduction.");
4901
4902           epilog_reduc_code = ERROR_MARK;
4903         }
4904
4905       if (reduc_optab
4906           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4907         {
4908           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4909             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4910                              "reduc op not supported by target.");
4911
4912           epilog_reduc_code = ERROR_MARK;
4913         }
4914     }
4915   else
4916     {
4917       if (!nested_cycle || double_reduc)
4918         {
4919           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4920             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921                              "no reduc code for scalar code.");
4922
4923           return false;
4924         }
4925     }
4926
4927   if (double_reduc && ncopies > 1)
4928     {
4929       if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4930         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4931                          "multiple types in double reduction");
4932
4933       return false;
4934     }
4935
4936   /* In case of widenning multiplication by a constant, we update the type
4937      of the constant to be the type of the other operand.  We check that the
4938      constant fits the type in the pattern recognition pass.  */
4939   if (code == DOT_PROD_EXPR
4940       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4941     {
4942       if (TREE_CODE (ops[0]) == INTEGER_CST)
4943         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4944       else if (TREE_CODE (ops[1]) == INTEGER_CST)
4945         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4946       else
4947         {
4948           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4949             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4950                              "invalid types in dot-prod");
4951
4952           return false;
4953         }
4954     }
4955
4956   if (!vec_stmt) /* transformation not required.  */
4957     {
4958       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4959         return false;
4960       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4961       return true;
4962     }
4963
4964   /** Transform.  **/
4965
4966   if (dump_kind_p (MSG_NOTE))
4967     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.");
4968
4969   /* FORNOW: Multiple types are not supported for condition.  */
4970   if (code == COND_EXPR)
4971     gcc_assert (ncopies == 1);
4972
4973   /* Create the destination vector  */
4974   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4975
4976   /* In case the vectorization factor (VF) is bigger than the number
4977      of elements that we can fit in a vectype (nunits), we have to generate
4978      more than one vector stmt - i.e - we need to "unroll" the
4979      vector stmt by a factor VF/nunits.  For more details see documentation
4980      in vectorizable_operation.  */
4981
4982   /* If the reduction is used in an outer loop we need to generate
4983      VF intermediate results, like so (e.g. for ncopies=2):
4984         r0 = phi (init, r0)
4985         r1 = phi (init, r1)
4986         r0 = x0 + r0;
4987         r1 = x1 + r1;
4988     (i.e. we generate VF results in 2 registers).
4989     In this case we have a separate def-use cycle for each copy, and therefore
4990     for each copy we get the vector def for the reduction variable from the
4991     respective phi node created for this copy.
4992
4993     Otherwise (the reduction is unused in the loop nest), we can combine
4994     together intermediate results, like so (e.g. for ncopies=2):
4995         r = phi (init, r)
4996         r = x0 + r;
4997         r = x1 + r;
4998    (i.e. we generate VF/2 results in a single register).
4999    In this case for each copy we get the vector def for the reduction variable
5000    from the vectorized reduction operation generated in the previous iteration.
5001   */
5002
5003   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5004     {
5005       single_defuse_cycle = true;
5006       epilog_copies = 1;
5007     }
5008   else
5009     epilog_copies = ncopies;
5010
5011   prev_stmt_info = NULL;
5012   prev_phi_info = NULL;
5013   if (slp_node)
5014     {
5015       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5016       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5017                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5018     }
5019   else
5020     {
5021       vec_num = 1;
5022       vec_oprnds0 = VEC_alloc (tree, heap, 1);
5023       if (op_type == ternary_op)
5024         vec_oprnds1 = VEC_alloc (tree, heap, 1);
5025     }
5026
5027   phis = VEC_alloc (gimple, heap, vec_num);
5028   vect_defs = VEC_alloc (tree, heap, vec_num);
5029   if (!slp_node)
5030     VEC_quick_push (tree, vect_defs, NULL_TREE);
5031
5032   for (j = 0; j < ncopies; j++)
5033     {
5034       if (j == 0 || !single_defuse_cycle)
5035         {
5036           for (i = 0; i < vec_num; i++)
5037             {
5038               /* Create the reduction-phi that defines the reduction
5039                  operand.  */
5040               new_phi = create_phi_node (vec_dest, loop->header);
5041               set_vinfo_for_stmt (new_phi,
5042                                   new_stmt_vec_info (new_phi, loop_vinfo,
5043                                                      NULL));
5044                if (j == 0 || slp_node)
5045                  VEC_quick_push (gimple, phis, new_phi);
5046             }
5047         }
5048
5049       if (code == COND_EXPR)
5050         {
5051           gcc_assert (!slp_node);
5052           vectorizable_condition (stmt, gsi, vec_stmt,
5053                                   PHI_RESULT (VEC_index (gimple, phis, 0)),
5054                                   reduc_index, NULL);
5055           /* Multiple types are not supported for condition.  */
5056           break;
5057         }
5058
5059       /* Handle uses.  */
5060       if (j == 0)
5061         {
5062           op0 = ops[!reduc_index];
5063           if (op_type == ternary_op)
5064             {
5065               if (reduc_index == 0)
5066                 op1 = ops[2];
5067               else
5068                 op1 = ops[1];
5069             }
5070
5071           if (slp_node)
5072             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5073                                slp_node, -1);
5074           else
5075             {
5076               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5077                                                             stmt, NULL);
5078               VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
5079               if (op_type == ternary_op)
5080                {
5081                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5082                                                                NULL);
5083                  VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
5084                }
5085             }
5086         }
5087       else
5088         {
5089           if (!slp_node)
5090             {
5091               enum vect_def_type dt;
5092               gimple dummy_stmt;
5093               tree dummy;
5094
5095               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5096                                   &dummy_stmt, &dummy, &dt);
5097               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5098                                                               loop_vec_def0);
5099               VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
5100               if (op_type == ternary_op)
5101                 {
5102                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5103                                       &dummy, &dt);
5104                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5105                                                                 loop_vec_def1);
5106                   VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
5107                 }
5108             }
5109
5110           if (single_defuse_cycle)
5111             reduc_def = gimple_assign_lhs (new_stmt);
5112
5113           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5114         }
5115
5116       FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
5117         {
5118           if (slp_node)
5119             reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
5120           else
5121             {
5122               if (!single_defuse_cycle || j == 0)
5123                 reduc_def = PHI_RESULT (new_phi);
5124             }
5125
5126           def1 = ((op_type == ternary_op)
5127                   ? VEC_index (tree, vec_oprnds1, i) : NULL);
5128           if (op_type == binary_op)
5129             {
5130               if (reduc_index == 0)
5131                 expr = build2 (code, vectype_out, reduc_def, def0);
5132               else
5133                 expr = build2 (code, vectype_out, def0, reduc_def);
5134             }
5135           else
5136             {
5137               if (reduc_index == 0)
5138                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5139               else
5140                 {
5141                   if (reduc_index == 1)
5142                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5143                   else
5144                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5145                 }
5146             }
5147
5148           new_stmt = gimple_build_assign (vec_dest, expr);
5149           new_temp = make_ssa_name (vec_dest, new_stmt);
5150           gimple_assign_set_lhs (new_stmt, new_temp);
5151           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5152
5153           if (slp_node)
5154             {
5155               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5156               VEC_quick_push (tree, vect_defs, new_temp);
5157             }
5158           else
5159             VEC_replace (tree, vect_defs, 0, new_temp);
5160         }
5161
5162       if (slp_node)
5163         continue;
5164
5165       if (j == 0)
5166         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5167       else
5168         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5169
5170       prev_stmt_info = vinfo_for_stmt (new_stmt);
5171       prev_phi_info = vinfo_for_stmt (new_phi);
5172     }
5173
5174   /* Finalize the reduction-phi (set its arguments) and create the
5175      epilog reduction code.  */
5176   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5177     {
5178       new_temp = gimple_assign_lhs (*vec_stmt);
5179       VEC_replace (tree, vect_defs, 0, new_temp);
5180     }
5181
5182   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5183                                     epilog_reduc_code, phis, reduc_index,
5184                                     double_reduc, slp_node);
5185
5186   VEC_free (gimple, heap, phis);
5187   VEC_free (tree, heap, vec_oprnds0);
5188   if (vec_oprnds1)
5189     VEC_free (tree, heap, vec_oprnds1);
5190
5191   return true;
5192 }
5193
5194 /* Function vect_min_worthwhile_factor.
5195
5196    For a loop where we could vectorize the operation indicated by CODE,
5197    return the minimum vectorization factor that makes it worthwhile
5198    to use generic vectors.  */
5199 int
5200 vect_min_worthwhile_factor (enum tree_code code)
5201 {
5202   switch (code)
5203     {
5204     case PLUS_EXPR:
5205     case MINUS_EXPR:
5206     case NEGATE_EXPR:
5207       return 4;
5208
5209     case BIT_AND_EXPR:
5210     case BIT_IOR_EXPR:
5211     case BIT_XOR_EXPR:
5212     case BIT_NOT_EXPR:
5213       return 2;
5214
5215     default:
5216       return INT_MAX;
5217     }
5218 }
5219
5220
5221 /* Function vectorizable_induction
5222
5223    Check if PHI performs an induction computation that can be vectorized.
5224    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5225    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5226    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5227
5228 bool
5229 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5230                         gimple *vec_stmt)
5231 {
5232   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5233   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5234   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5235   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5236   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5237   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5238   tree vec_def;
5239
5240   gcc_assert (ncopies >= 1);
5241   /* FORNOW. These restrictions should be relaxed.  */
5242   if (nested_in_vect_loop_p (loop, phi))
5243     {
5244       imm_use_iterator imm_iter;
5245       use_operand_p use_p;
5246       gimple exit_phi;
5247       edge latch_e;
5248       tree loop_arg;
5249
5250       if (ncopies > 1)
5251         {
5252           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
5253             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5254                              "multiple types in nested loop.");
5255           return false;
5256         }
5257
5258       exit_phi = NULL;
5259       latch_e = loop_latch_edge (loop->inner);
5260       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5261       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5262         {
5263           if (!flow_bb_inside_loop_p (loop->inner,
5264                                       gimple_bb (USE_STMT (use_p))))
5265             {
5266               exit_phi = USE_STMT (use_p);
5267               break;
5268             }
5269         }
5270       if (exit_phi)
5271         {
5272           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5273           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5274                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5275             {
5276               if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
5277                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5278                                  "inner-loop induction only used outside "
5279                                  "of the outer vectorized loop.");
5280               return false;
5281             }
5282         }
5283     }
5284
5285   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5286     return false;
5287
5288   /* FORNOW: SLP not supported.  */
5289   if (STMT_SLP_TYPE (stmt_info))
5290     return false;
5291
5292   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5293
5294   if (gimple_code (phi) != GIMPLE_PHI)
5295     return false;
5296
5297   if (!vec_stmt) /* transformation not required.  */
5298     {
5299       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5300       if (dump_kind_p (MSG_NOTE))
5301         dump_printf_loc (MSG_NOTE, vect_location,
5302                          "=== vectorizable_induction ===");
5303       vect_model_induction_cost (stmt_info, ncopies);
5304       return true;
5305     }
5306
5307   /** Transform.  **/
5308
5309   if (dump_kind_p (MSG_NOTE))
5310     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.");
5311
5312   vec_def = get_initial_def_for_induction (phi);
5313   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5314   return true;
5315 }
5316
5317 /* Function vectorizable_live_operation.
5318
5319    STMT computes a value that is used outside the loop.  Check if
5320    it can be supported.  */
5321
5322 bool
5323 vectorizable_live_operation (gimple stmt,
5324                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5325                              gimple *vec_stmt ATTRIBUTE_UNUSED)
5326 {
5327   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5328   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5329   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5330   int i;
5331   int op_type;
5332   tree op;
5333   tree def;
5334   gimple def_stmt;
5335   enum vect_def_type dt;
5336   enum tree_code code;
5337   enum gimple_rhs_class rhs_class;
5338
5339   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5340
5341   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5342     return false;
5343
5344   if (!is_gimple_assign (stmt))
5345     return false;
5346
5347   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5348     return false;
5349
5350   /* FORNOW. CHECKME. */
5351   if (nested_in_vect_loop_p (loop, stmt))
5352     return false;
5353
5354   code = gimple_assign_rhs_code (stmt);
5355   op_type = TREE_CODE_LENGTH (code);
5356   rhs_class = get_gimple_rhs_class (code);
5357   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5358   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5359
5360   /* FORNOW: support only if all uses are invariant.  This means
5361      that the scalar operations can remain in place, unvectorized.
5362      The original last scalar value that they compute will be used.  */
5363
5364   for (i = 0; i < op_type; i++)
5365     {
5366       if (rhs_class == GIMPLE_SINGLE_RHS)
5367         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5368       else
5369         op = gimple_op (stmt, i + 1);
5370       if (op
5371           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5372                                   &dt))
5373         {
5374           if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
5375             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5376                              "use not simple.");
5377           return false;
5378         }
5379
5380       if (dt != vect_external_def && dt != vect_constant_def)
5381         return false;
5382     }
5383
5384   /* No transformation is required for the cases we currently support.  */
5385   return true;
5386 }
5387
5388 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5389
5390 static void
5391 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5392 {
5393   ssa_op_iter op_iter;
5394   imm_use_iterator imm_iter;
5395   def_operand_p def_p;
5396   gimple ustmt;
5397
5398   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5399     {
5400       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5401         {
5402           basic_block bb;
5403
5404           if (!is_gimple_debug (ustmt))
5405             continue;
5406
5407           bb = gimple_bb (ustmt);
5408
5409           if (!flow_bb_inside_loop_p (loop, bb))
5410             {
5411               if (gimple_debug_bind_p (ustmt))
5412                 {
5413                   if (dump_kind_p (MSG_NOTE))
5414                     dump_printf_loc (MSG_NOTE, vect_location,
5415                                      "killing debug use");
5416
5417                   gimple_debug_bind_reset_value (ustmt);
5418                   update_stmt (ustmt);
5419                 }
5420               else
5421                 gcc_unreachable ();
5422             }
5423         }
5424     }
5425 }
5426
5427 /* Function vect_transform_loop.
5428
5429    The analysis phase has determined that the loop is vectorizable.
5430    Vectorize the loop - created vectorized stmts to replace the scalar
5431    stmts in the loop, and update the loop exit condition.  */
5432
5433 void
5434 vect_transform_loop (loop_vec_info loop_vinfo)
5435 {
5436   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5437   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5438   int nbbs = loop->num_nodes;
5439   gimple_stmt_iterator si;
5440   int i;
5441   tree ratio = NULL;
5442   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5443   bool grouped_store;
5444   bool slp_scheduled = false;
5445   unsigned int nunits;
5446   gimple stmt, pattern_stmt;
5447   gimple_seq pattern_def_seq = NULL;
5448   gimple_stmt_iterator pattern_def_si = gsi_none ();
5449   bool transform_pattern_stmt = false;
5450   bool check_profitability = false;
5451   int th;
5452
5453   if (dump_kind_p (MSG_NOTE))
5454     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
5455
5456   /* Use the more conservative vectorization threshold.  If the number
5457      of iterations is constant assume the cost check has been performed
5458      by our caller.  If the threshold makes all loops profitable that
5459      run at least the vectorization factor number of times checking
5460      is pointless, too.  */
5461   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5462          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5463   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5464   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5465       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5466     {
5467       if (dump_kind_p (MSG_NOTE))
5468         dump_printf_loc (MSG_NOTE, vect_location,
5469                          "Profitability threshold is %d loop iterations.", th);
5470       check_profitability = true;
5471     }
5472
5473   /* Peel the loop if there are data refs with unknown alignment.
5474      Only one data ref with unknown store is allowed.  */
5475
5476   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5477     {
5478       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5479       check_profitability = false;
5480     }
5481
5482   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5483       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5484     {
5485       vect_loop_versioning (loop_vinfo, th, check_profitability);
5486       check_profitability = false;
5487     }
5488
5489   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5490      compile time constant), or it is a constant that doesn't divide by the
5491      vectorization factor, then an epilog loop needs to be created.
5492      We therefore duplicate the loop: the original loop will be vectorized,
5493      and will compute the first (n/VF) iterations.  The second copy of the loop
5494      will remain scalar and will compute the remaining (n%VF) iterations.
5495      (VF is the vectorization factor).  */
5496
5497   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5498        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5499            && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5500        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5501     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5502                                     th, check_profitability);
5503   else
5504     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5505                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5506
5507   /* 1) Make sure the loop header has exactly two entries
5508      2) Make sure we have a preheader basic block.  */
5509
5510   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5511
5512   split_edge (loop_preheader_edge (loop));
5513
5514   /* FORNOW: the vectorizer supports only loops which body consist
5515      of one basic block (header + empty latch). When the vectorizer will
5516      support more involved loop forms, the order by which the BBs are
5517      traversed need to be reconsidered.  */
5518
5519   for (i = 0; i < nbbs; i++)
5520     {
5521       basic_block bb = bbs[i];
5522       stmt_vec_info stmt_info;
5523       gimple phi;
5524
5525       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5526         {
5527           phi = gsi_stmt (si);
5528           if (dump_kind_p (MSG_NOTE))
5529             {
5530               dump_printf_loc (MSG_NOTE, vect_location,
5531                                "------>vectorizing phi: ");
5532               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5533             }
5534           stmt_info = vinfo_for_stmt (phi);
5535           if (!stmt_info)
5536             continue;
5537
5538           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5539             vect_loop_kill_debug_uses (loop, phi);
5540
5541           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5542               && !STMT_VINFO_LIVE_P (stmt_info))
5543             continue;
5544
5545           if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5546                 != (unsigned HOST_WIDE_INT) vectorization_factor)
5547               && dump_kind_p (MSG_NOTE))
5548             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.");
5549
5550           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5551             {
5552               if (dump_kind_p (MSG_NOTE))
5553                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.");
5554               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5555             }
5556         }
5557
5558       pattern_stmt = NULL;
5559       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5560         {
5561           bool is_store;
5562
5563           if (transform_pattern_stmt)
5564             stmt = pattern_stmt;
5565           else
5566             stmt = gsi_stmt (si);
5567
5568           if (dump_kind_p (MSG_NOTE))
5569             {
5570               dump_printf_loc (MSG_NOTE, vect_location,
5571                                "------>vectorizing statement: ");
5572               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5573             }
5574
5575           stmt_info = vinfo_for_stmt (stmt);
5576
5577           /* vector stmts created in the outer-loop during vectorization of
5578              stmts in an inner-loop may not have a stmt_info, and do not
5579              need to be vectorized.  */
5580           if (!stmt_info)
5581             {
5582               gsi_next (&si);
5583               continue;
5584             }
5585
5586           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5587             vect_loop_kill_debug_uses (loop, stmt);
5588
5589           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5590               && !STMT_VINFO_LIVE_P (stmt_info))
5591             {
5592               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5593                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5594                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5595                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5596                 {
5597                   stmt = pattern_stmt;
5598                   stmt_info = vinfo_for_stmt (stmt);
5599                 }
5600               else
5601                 {
5602                   gsi_next (&si);
5603                   continue;
5604                 }
5605             }
5606           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5607                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5608                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5609                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5610             transform_pattern_stmt = true;
5611
5612           /* If pattern statement has def stmts, vectorize them too.  */
5613           if (is_pattern_stmt_p (stmt_info))
5614             {
5615               if (pattern_def_seq == NULL)
5616                 {
5617                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5618                   pattern_def_si = gsi_start (pattern_def_seq);
5619                 }
5620               else if (!gsi_end_p (pattern_def_si))
5621                 gsi_next (&pattern_def_si);
5622               if (pattern_def_seq != NULL)
5623                 {
5624                   gimple pattern_def_stmt = NULL;
5625                   stmt_vec_info pattern_def_stmt_info = NULL;
5626
5627                   while (!gsi_end_p (pattern_def_si))
5628                     {
5629                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5630                       pattern_def_stmt_info
5631                         = vinfo_for_stmt (pattern_def_stmt);
5632                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5633                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5634                         break;
5635                       gsi_next (&pattern_def_si);
5636                     }
5637
5638                   if (!gsi_end_p (pattern_def_si))
5639                     {
5640                       if (dump_kind_p (MSG_NOTE))
5641                         {
5642                           dump_printf_loc (MSG_NOTE, vect_location,
5643                                            "==> vectorizing pattern def "
5644                                            "stmt: ");
5645                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5646                                             pattern_def_stmt, 0);
5647                         }
5648
5649                       stmt = pattern_def_stmt;
5650                       stmt_info = pattern_def_stmt_info;
5651                     }
5652                   else
5653                     {
5654                       pattern_def_si = gsi_none ();
5655                       transform_pattern_stmt = false;
5656                     }
5657                 }
5658               else
5659                 transform_pattern_stmt = false;
5660             }
5661
5662           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5663           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5664                                                STMT_VINFO_VECTYPE (stmt_info));
5665           if (!STMT_SLP_TYPE (stmt_info)
5666               && nunits != (unsigned int) vectorization_factor
5667               && dump_kind_p (MSG_NOTE))
5668             /* For SLP VF is set according to unrolling factor, and not to
5669                vector size, hence for SLP this print is not valid.  */
5670             dump_printf_loc (MSG_NOTE, vect_location,
5671                              "multiple-types.");
5672
5673           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5674              reached.  */
5675           if (STMT_SLP_TYPE (stmt_info))
5676             {
5677               if (!slp_scheduled)
5678                 {
5679                   slp_scheduled = true;
5680
5681                   if (dump_kind_p (MSG_NOTE))
5682                     dump_printf_loc (MSG_NOTE, vect_location,
5683                                      "=== scheduling SLP instances ===");
5684
5685                   vect_schedule_slp (loop_vinfo, NULL);
5686                 }
5687
5688               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5689               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5690                 {
5691                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5692                     {
5693                       pattern_def_seq = NULL;
5694                       gsi_next (&si);
5695                     }
5696                   continue;
5697                 }
5698             }
5699
5700           /* -------- vectorize statement ------------ */
5701           if (dump_kind_p (MSG_NOTE))
5702             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.");
5703
5704           grouped_store = false;
5705           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5706           if (is_store)
5707             {
5708               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5709                 {
5710                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5711                      interleaving chain was completed - free all the stores in
5712                      the chain.  */
5713                   gsi_next (&si);
5714                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5715                   continue;
5716                 }
5717               else
5718                 {
5719                   /* Free the attached stmt_vec_info and remove the stmt.  */
5720                   gimple store = gsi_stmt (si);
5721                   free_stmt_vec_info (store);
5722                   unlink_stmt_vdef (store);
5723                   gsi_remove (&si, true);
5724                   release_defs (store);
5725                   continue;
5726                 }
5727             }
5728
5729           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5730             {
5731               pattern_def_seq = NULL;
5732               gsi_next (&si);
5733             }
5734         }                       /* stmts in BB */
5735     }                           /* BBs in loop */
5736
5737   slpeel_make_loop_iterate_ntimes (loop, ratio);
5738
5739   /* The memory tags and pointers in vectorized statements need to
5740      have their SSA forms updated.  FIXME, why can't this be delayed
5741      until all the loops have been transformed?  */
5742   update_ssa (TODO_update_ssa);
5743
5744   if (dump_kind_p (MSG_OPTIMIZED_LOCATIONS))
5745     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "LOOP VECTORIZED.");
5746   if (loop->inner && dump_kind_p (MSG_OPTIMIZED_LOCATIONS))
5747     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
5748                      "OUTER LOOP VECTORIZED.");
5749 }