gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "tm.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-flow.h"
  32 #include "tree-pass.h"
  33 #include "cfgloop.h"
  34 #include "expr.h"
  35 #include "recog.h"
  36 #include "optabs.h"
  37 #include "params.h"
  38 #include "diagnostic-core.h"
  39 #include "tree-chrec.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "tree-vectorizer.h"
  42 #include "target.h"
  43
  44 /* Loop Vectorization Pass.
  45
  46    This pass tries to vectorize loops.
  47
  48    For example, the vectorizer transforms the following simple loop:
  49
  50         short a[N]; short b[N]; short c[N]; int i;
  51
  52         for (i=0; i<N; i++){
  53           a[i] = b[i] + c[i];
  54         }
  55
  56    as if it was manually vectorized by rewriting the source code into:
  57
  58         typedef int __attribute__((mode(V8HI))) v8hi;
  59         short a[N];  short b[N]; short c[N];   int i;
  60         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  61         v8hi va, vb, vc;
  62
  63         for (i=0; i<N/8; i++){
  64           vb = pb[i];
  65           vc = pc[i];
  66           va = vb + vc;
  67           pa[i] = va;
  68         }
  69
  70         The main entry to this pass is vectorize_loops(), in which
  71    the vectorizer applies a set of analyses on a given set of loops,
  72    followed by the actual vectorization transformation for the loops that
  73    had successfully passed the analysis phase.
  74         Throughout this pass we make a distinction between two types of
  75    data: scalars (which are represented by SSA_NAMES), and memory references
  76    ("data-refs").  These two types of data require different handling both
  77    during analysis and transformation. The types of data-refs that the
  78    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  79    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  80    accesses are required to have a simple (consecutive) access pattern.
  81
  82    Analysis phase:
  83    ===============
  84         The driver for the analysis phase is vect_analyze_loop().
  85    It applies a set of analyses, some of which rely on the scalar evolution
  86    analyzer (scev) developed by Sebastian Pop.
  87
  88         During the analysis phase the vectorizer records some information
  89    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  90    loop, as well as general information about the loop as a whole, which is
  91    recorded in a "loop_vec_info" struct attached to each loop.
  92
  93    Transformation phase:
  94    =====================
  95         The loop transformation phase scans all the stmts in the loop, and
  96    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  97    the loop that needs to be vectorized.  It inserts the vector code sequence
  98    just before the scalar stmt S, and records a pointer to the vector code
  99    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 100    attached to S).  This pointer will be used for the vectorization of following
 101    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 102    otherwise, we rely on dead code elimination for removing it.
 103
 104         For example, say stmt S1 was vectorized into stmt VS1:
 105
 106    VS1: vb = px[i];
 107    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 108    S2:  a = b;
 109
 110    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 111    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 112    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 113    resulting sequence would be:
 114
 115    VS1: vb = px[i];
 116    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 117    VS2: va = vb;
 118    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 119
 120         Operands that are not SSA_NAMEs, are data-refs that appear in
 121    load/store operations (like 'x[i]' in S1), and are handled differently.
 122
 123    Target modeling:
 124    =================
 125         Currently the only target specific information that is used is the
 126    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 127    Targets that can support different sizes of vectors, for now will need
 128    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 129    flexibility will be added in the future.
 130
 131         Since we only vectorize operations which vector form can be
 132    expressed using existing tree codes, to verify that an operation is
 133    supported, the vectorizer checks the relevant optab at the relevant
 134    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 135    the value found is CODE_FOR_nothing, then there's no target support, and
 136    we can't vectorize the stmt.
 137
 138    For additional information on this project see:
 139    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 140 */
 141
 142 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 143
 144 /* Function vect_determine_vectorization_factor
 145
 146    Determine the vectorization factor (VF).  VF is the number of data elements
 147    that are operated upon in parallel in a single iteration of the vectorized
 148    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 149    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 150    elements can fit in a single vector register.
 151
 152    We currently support vectorization of loops in which all types operated upon
 153    are of the same size.  Therefore this function currently sets VF according to
 154    the size of the types operated upon, and fails if there are multiple sizes
 155    in the loop.
 156
 157    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 158    original loop:
 159         for (i=0; i<N; i++){
 160           a[i] = b[i] + c[i];
 161         }
 162
 163    vectorized loop:
 164         for (i=0; i<N; i+=VF){
 165           a[i:VF] = b[i:VF] + c[i:VF];
 166         }
 167 */
 168
 169 static bool
 170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 171 {
 172   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 173   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 174   int nbbs = loop->num_nodes;
 175   gimple_stmt_iterator si;
 176   unsigned int vectorization_factor = 0;
 177   tree scalar_type;
 178   gimple phi;
 179   tree vectype;
 180   unsigned int nunits;
 181   stmt_vec_info stmt_info;
 182   int i;
 183   HOST_WIDE_INT dummy;
 184   gimple stmt, pattern_stmt = NULL;
 185   gimple_seq pattern_def_seq = NULL;
 186   gimple_stmt_iterator pattern_def_si = gsi_none ();
 187   bool analyze_pattern_stmt = false;
 188
 189   if (dump_enabled_p ())
 190     dump_printf_loc (MSG_NOTE, vect_location,
 191                      "=== vect_determine_vectorization_factor ===");
 192
 193   for (i = 0; i < nbbs; i++)
 194     {
 195       basic_block bb = bbs[i];
 196
 197       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 198         {
 199           phi = gsi_stmt (si);
 200           stmt_info = vinfo_for_stmt (phi);
 201           if (dump_enabled_p ())
 202             {
 203               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 204               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 205             }
 206
 207           gcc_assert (stmt_info);
 208
 209           if (STMT_VINFO_RELEVANT_P (stmt_info))
 210             {
 211               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 212               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 213
 214               if (dump_enabled_p ())
 215                 {
 216                   dump_printf_loc (MSG_NOTE, vect_location,
 217                                    "get vectype for scalar type:  ");
 218                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 219                 }
 220
 221               vectype = get_vectype_for_scalar_type (scalar_type);
 222               if (!vectype)
 223                 {
 224                   if (dump_enabled_p ())
 225                     {
 226                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 227                                        "not vectorized: unsupported "
 228                                        "data-type ");
 229                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 230                                          scalar_type);
 231                     }
 232                   return false;
 233                 }
 234               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 235
 236               if (dump_enabled_p ())
 237                 {
 238                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 239                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 240                 }
 241
 242               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 243               if (dump_enabled_p ())
 244                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
 245
 246               if (!vectorization_factor
 247                   || (nunits > vectorization_factor))
 248                 vectorization_factor = nunits;
 249             }
 250         }
 251
 252       for (si = gsi_start_bb (bb); !gsi_end_p (si) || analyze_pattern_stmt;)
 253         {
 254           tree vf_vectype;
 255
 256           if (analyze_pattern_stmt)
 257             stmt = pattern_stmt;
 258           else
 259             stmt = gsi_stmt (si);
 260
 261           stmt_info = vinfo_for_stmt (stmt);
 262
 263           if (dump_enabled_p ())
 264             {
 265               dump_printf_loc (MSG_NOTE, vect_location,
 266                                "==> examining statement: ");
 267               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 268             }
 269
 270           gcc_assert (stmt_info);
 271
 272           /* Skip stmts which do not need to be vectorized.  */
 273           if (!STMT_VINFO_RELEVANT_P (stmt_info)
 274               && !STMT_VINFO_LIVE_P (stmt_info))
 275             {
 276               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 277                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 278                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 279                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 280                 {
 281                   stmt = pattern_stmt;
 282                   stmt_info = vinfo_for_stmt (pattern_stmt);
 283                   if (dump_enabled_p ())
 284                     {
 285                       dump_printf_loc (MSG_NOTE, vect_location,
 286                                        "==> examining pattern statement: ");
 287                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 288                     }
 289                 }
 290               else
 291                 {
 292                   if (dump_enabled_p ())
 293                     dump_printf_loc (MSG_NOTE, vect_location, "skip.");
 294                   gsi_next (&si);
 295                   continue;
 296                 }
 297             }
 298           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 299                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 300                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 301                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 302             analyze_pattern_stmt = true;
 303
 304           /* If a pattern statement has def stmts, analyze them too.  */
 305           if (is_pattern_stmt_p (stmt_info))
 306             {
 307               if (pattern_def_seq == NULL)
 308                 {
 309                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 310                   pattern_def_si = gsi_start (pattern_def_seq);
 311                 }
 312               else if (!gsi_end_p (pattern_def_si))
 313                 gsi_next (&pattern_def_si);
 314               if (pattern_def_seq != NULL)
 315                 {
 316                   gimple pattern_def_stmt = NULL;
 317                   stmt_vec_info pattern_def_stmt_info = NULL;
 318
 319                   while (!gsi_end_p (pattern_def_si))
 320                     {
 321                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 322                       pattern_def_stmt_info
 323                         = vinfo_for_stmt (pattern_def_stmt);
 324                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 325                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 326                         break;
 327                       gsi_next (&pattern_def_si);
 328                     }
 329
 330                   if (!gsi_end_p (pattern_def_si))
 331                     {
 332                       if (dump_enabled_p ())
 333                         {
 334                           dump_printf_loc (MSG_NOTE, vect_location,
 335                                            "==> examining pattern def stmt: ");
 336                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 337                                             pattern_def_stmt, 0);
 338                         }
 339
 340                       stmt = pattern_def_stmt;
 341                       stmt_info = pattern_def_stmt_info;
 342                     }
 343                   else
 344                     {
 345                       pattern_def_si = gsi_none ();
 346                       analyze_pattern_stmt = false;
 347                     }
 348                 }
 349               else
 350                 analyze_pattern_stmt = false;
 351             }
 352
 353           if (gimple_get_lhs (stmt) == NULL_TREE)
 354             {
 355               if (dump_enabled_p ())
 356                 {
 357                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 358                                    "not vectorized: irregular stmt.");
 359                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 360                                     0);
 361                 }
 362               return false;
 363             }
 364
 365           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 366             {
 367               if (dump_enabled_p ())
 368                 {
 369                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 370                                    "not vectorized: vector stmt in loop:");
 371                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 372                 }
 373               return false;
 374             }
 375
 376           if (STMT_VINFO_VECTYPE (stmt_info))
 377             {
 378               /* The only case when a vectype had been already set is for stmts
 379                  that contain a dataref, or for "pattern-stmts" (stmts
 380                  generated by the vectorizer to represent/replace a certain
 381                  idiom).  */
 382               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 383                           || is_pattern_stmt_p (stmt_info)
 384                           || !gsi_end_p (pattern_def_si));
 385               vectype = STMT_VINFO_VECTYPE (stmt_info);
 386             }
 387           else
 388             {
 389               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 390               scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 391               if (dump_enabled_p ())
 392                 {
 393                   dump_printf_loc (MSG_NOTE, vect_location,
 394                                    "get vectype for scalar type:  ");
 395                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 396                 }
 397               vectype = get_vectype_for_scalar_type (scalar_type);
 398               if (!vectype)
 399                 {
 400                   if (dump_enabled_p ())
 401                     {
 402                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 403                                        "not vectorized: unsupported "
 404                                        "data-type ");
 405                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 406                                          scalar_type);
 407                     }
 408                   return false;
 409                 }
 410
 411               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 412             }
 413
 414           /* The vectorization factor is according to the smallest
 415              scalar type (or the largest vector size, but we only
 416              support one vector size per loop).  */
 417           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 418                                                        &dummy);
 419           if (dump_enabled_p ())
 420             {
 421               dump_printf_loc (MSG_NOTE, vect_location,
 422                                "get vectype for scalar type:  ");
 423               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 424             }
 425           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 426           if (!vf_vectype)
 427             {
 428               if (dump_enabled_p ())
 429                 {
 430                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 431                                    "not vectorized: unsupported data-type ");
 432                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 433                                      scalar_type);
 434                 }
 435               return false;
 436             }
 437
 438           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 439                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 440             {
 441               if (dump_enabled_p ())
 442                 {
 443                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 444                                    "not vectorized: different sized vector "
 445                                    "types in statement, ");
 446                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 447                                      vectype);
 448                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 449                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 450                                      vf_vectype);
 451                 }
 452               return false;
 453             }
 454
 455           if (dump_enabled_p ())
 456             {
 457               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 458               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 459             }
 460
 461           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 462           if (dump_enabled_p ())
 463             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d", nunits);
 464           if (!vectorization_factor
 465               || (nunits > vectorization_factor))
 466             vectorization_factor = nunits;
 467
 468           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 469             {
 470               pattern_def_seq = NULL;
 471               gsi_next (&si);
 472             }
 473         }
 474     }
 475
 476   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 477   if (dump_enabled_p ())
 478     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d",
 479                      vectorization_factor);
 480   if (vectorization_factor <= 1)
 481     {
 482       if (dump_enabled_p ())
 483         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 484                          "not vectorized: unsupported data-type");
 485       return false;
 486     }
 487   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 488
 489   return true;
 490 }
 491
 492
 493 /* Function vect_is_simple_iv_evolution.
 494
 495    FORNOW: A simple evolution of an induction variables in the loop is
 496    considered a polynomial evolution with constant step.  */
 497
 498 static bool
 499 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 500                              tree * step)
 501 {
 502   tree init_expr;
 503   tree step_expr;
 504   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 505
 506   /* When there is no evolution in this loop, the evolution function
 507      is not "simple".  */
 508   if (evolution_part == NULL_TREE)
 509     return false;
 510
 511   /* When the evolution is a polynomial of degree >= 2
 512      the evolution function is not "simple".  */
 513   if (tree_is_chrec (evolution_part))
 514     return false;
 515
 516   step_expr = evolution_part;
 517   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 518
 519   if (dump_enabled_p ())
 520     {
 521       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 522       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 523       dump_printf (MSG_NOTE, ",  init: ");
 524       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 525     }
 526
 527   *init = init_expr;
 528   *step = step_expr;
 529
 530   if (TREE_CODE (step_expr) != INTEGER_CST)
 531     {
 532       if (dump_enabled_p ())
 533         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 534                          "step unknown.");
 535       return false;
 536     }
 537
 538   return true;
 539 }
 540
 541 /* Function vect_analyze_scalar_cycles_1.
 542
 543    Examine the cross iteration def-use cycles of scalar variables
 544    in LOOP.  LOOP_VINFO represents the loop that is now being
 545    considered for vectorization (can be LOOP, or an outer-loop
 546    enclosing LOOP).  */
 547
 548 static void
 549 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 550 {
 551   basic_block bb = loop->header;
 552   tree dumy;
 553   vec<gimple> worklist;
 554   worklist.create (64);
 555   gimple_stmt_iterator gsi;
 556   bool double_reduc;
 557
 558   if (dump_enabled_p ())
 559     dump_printf_loc (MSG_NOTE, vect_location,
 560                      "=== vect_analyze_scalar_cycles ===");
 561
 562   /* First - identify all inductions.  Reduction detection assumes that all the
 563      inductions have been identified, therefore, this order must not be
 564      changed.  */
 565   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 566     {
 567       gimple phi = gsi_stmt (gsi);
 568       tree access_fn = NULL;
 569       tree def = PHI_RESULT (phi);
 570       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 571
 572       if (dump_enabled_p ())
 573         {
 574           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 575           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 576         }
 577
 578       /* Skip virtual phi's.  The data dependences that are associated with
 579          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 580       if (virtual_operand_p (def))
 581         continue;
 582
 583       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 584
 585       /* Analyze the evolution function.  */
 586       access_fn = analyze_scalar_evolution (loop, def);
 587       if (access_fn)
 588         {
 589           STRIP_NOPS (access_fn);
 590           if (dump_enabled_p ())
 591             {
 592               dump_printf_loc (MSG_NOTE, vect_location,
 593                                "Access function of PHI: ");
 594               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 595             }
 596           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 597             = evolution_part_in_loop_num (access_fn, loop->num);
 598         }
 599
 600       if (!access_fn
 601           || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
 602         {
 603           worklist.safe_push (phi);
 604           continue;
 605         }
 606
 607       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 608
 609       if (dump_enabled_p ())
 610         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.");
 611       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 612     }
 613
 614
 615   /* Second - identify all reductions and nested cycles.  */
 616   while (worklist.length () > 0)
 617     {
 618       gimple phi = worklist.pop ();
 619       tree def = PHI_RESULT (phi);
 620       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 621       gimple reduc_stmt;
 622       bool nested_cycle;
 623
 624       if (dump_enabled_p ())
 625         {
 626           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 627           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 628         }
 629
 630       gcc_assert (!virtual_operand_p (def)
 631                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 632
 633       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 634       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 635                                                 &double_reduc);
 636       if (reduc_stmt)
 637         {
 638           if (double_reduc)
 639             {
 640               if (dump_enabled_p ())
 641                 dump_printf_loc (MSG_NOTE, vect_location,
 642                                  "Detected double reduction.");
 643
 644               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 645               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 646                                                     vect_double_reduction_def;
 647             }
 648           else
 649             {
 650               if (nested_cycle)
 651                 {
 652                   if (dump_enabled_p ())
 653                     dump_printf_loc (MSG_NOTE, vect_location,
 654                                      "Detected vectorizable nested cycle.");
 655
 656                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 657                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 658                                                              vect_nested_cycle;
 659                 }
 660               else
 661                 {
 662                   if (dump_enabled_p ())
 663                     dump_printf_loc (MSG_NOTE, vect_location,
 664                                      "Detected reduction.");
 665
 666                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 667                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 668                                                            vect_reduction_def;
 669                   /* Store the reduction cycles for possible vectorization in
 670                      loop-aware SLP.  */
 671                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 672                 }
 673             }
 674         }
 675       else
 676         if (dump_enabled_p ())
 677           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 678                            "Unknown def-use cycle pattern.");
 679     }
 680
 681   worklist.release ();
 682 }
 683
 684
 685 /* Function vect_analyze_scalar_cycles.
 686
 687    Examine the cross iteration def-use cycles of scalar variables, by
 688    analyzing the loop-header PHIs of scalar variables.  Classify each
 689    cycle as one of the following: invariant, induction, reduction, unknown.
 690    We do that for the loop represented by LOOP_VINFO, and also to its
 691    inner-loop, if exists.
 692    Examples for scalar cycles:
 693
 694    Example1: reduction:
 695
 696               loop1:
 697               for (i=0; i<N; i++)
 698                  sum += a[i];
 699
 700    Example2: induction:
 701
 702               loop2:
 703               for (i=0; i<N; i++)
 704                  a[i] = i;  */
 705
 706 static void
 707 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 708 {
 709   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 710
 711   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 712
 713   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 714      Reductions in such inner-loop therefore have different properties than
 715      the reductions in the nest that gets vectorized:
 716      1. When vectorized, they are executed in the same order as in the original
 717         scalar loop, so we can't change the order of computation when
 718         vectorizing them.
 719      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 720         current checks are too strict.  */
 721
 722   if (loop->inner)
 723     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 724 }
 725
 726 /* Function vect_get_loop_niters.
 727
 728    Determine how many iterations the loop is executed.
 729    If an expression that represents the number of iterations
 730    can be constructed, place it in NUMBER_OF_ITERATIONS.
 731    Return the loop exit condition.  */
 732
 733 static gimple
 734 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 735 {
 736   tree niters;
 737
 738   if (dump_enabled_p ())
 739     dump_printf_loc (MSG_NOTE, vect_location,
 740                      "=== get_loop_niters ===");
 741   niters = number_of_exit_cond_executions (loop);
 742
 743   if (niters != NULL_TREE
 744       && niters != chrec_dont_know)
 745     {
 746       *number_of_iterations = niters;
 747
 748       if (dump_enabled_p ())
 749         {
 750           dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
 751           dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
 752         }
 753     }
 754
 755   return get_loop_exit_condition (loop);
 756 }
 757
 758
 759 /* Function bb_in_loop_p
 760
 761    Used as predicate for dfs order traversal of the loop bbs.  */
 762
 763 static bool
 764 bb_in_loop_p (const_basic_block bb, const void *data)
 765 {
 766   const struct loop *const loop = (const struct loop *)data;
 767   if (flow_bb_inside_loop_p (loop, bb))
 768     return true;
 769   return false;
 770 }
 771
 772
 773 /* Function new_loop_vec_info.
 774
 775    Create and initialize a new loop_vec_info struct for LOOP, as well as
 776    stmt_vec_info structs for all the stmts in LOOP.  */
 777
 778 static loop_vec_info
 779 new_loop_vec_info (struct loop *loop)
 780 {
 781   loop_vec_info res;
 782   basic_block *bbs;
 783   gimple_stmt_iterator si;
 784   unsigned int i, nbbs;
 785
 786   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 787   LOOP_VINFO_LOOP (res) = loop;
 788
 789   bbs = get_loop_body (loop);
 790
 791   /* Create/Update stmt_info for all stmts in the loop.  */
 792   for (i = 0; i < loop->num_nodes; i++)
 793     {
 794       basic_block bb = bbs[i];
 795
 796       /* BBs in a nested inner-loop will have been already processed (because
 797          we will have called vect_analyze_loop_form for any nested inner-loop).
 798          Therefore, for stmts in an inner-loop we just want to update the
 799          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 800          loop_info of the outer-loop we are currently considering to vectorize
 801          (instead of the loop_info of the inner-loop).
 802          For stmts in other BBs we need to create a stmt_info from scratch.  */
 803       if (bb->loop_father != loop)
 804         {
 805           /* Inner-loop bb.  */
 806           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 807           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 808             {
 809               gimple phi = gsi_stmt (si);
 810               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 811               loop_vec_info inner_loop_vinfo =
 812                 STMT_VINFO_LOOP_VINFO (stmt_info);
 813               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 814               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 815             }
 816           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 817            {
 818               gimple stmt = gsi_stmt (si);
 819               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 820               loop_vec_info inner_loop_vinfo =
 821                  STMT_VINFO_LOOP_VINFO (stmt_info);
 822               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 823               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 824            }
 825         }
 826       else
 827         {
 828           /* bb in current nest.  */
 829           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 830             {
 831               gimple phi = gsi_stmt (si);
 832               gimple_set_uid (phi, 0);
 833               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 834             }
 835
 836           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 837             {
 838               gimple stmt = gsi_stmt (si);
 839               gimple_set_uid (stmt, 0);
 840               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 841             }
 842         }
 843     }
 844
 845   /* CHECKME: We want to visit all BBs before their successors (except for
 846      latch blocks, for which this assertion wouldn't hold).  In the simple
 847      case of the loop forms we allow, a dfs order of the BBs would the same
 848      as reversed postorder traversal, so we are safe.  */
 849
 850    free (bbs);
 851    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 852    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 853                               bbs, loop->num_nodes, loop);
 854    gcc_assert (nbbs == loop->num_nodes);
 855
 856   LOOP_VINFO_BBS (res) = bbs;
 857   LOOP_VINFO_NITERS (res) = NULL;
 858   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 859   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 860   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 861   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
 862   LOOP_VINFO_VECT_FACTOR (res) = 0;
 863   LOOP_VINFO_LOOP_NEST (res).create (3);
 864   LOOP_VINFO_DATAREFS (res).create (10);
 865   LOOP_VINFO_DDRS (res).create (10 * 10);
 866   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 867   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 868              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 869   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 870              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 871   LOOP_VINFO_GROUPED_STORES (res).create (10);
 872   LOOP_VINFO_REDUCTIONS (res).create (10);
 873   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 874   LOOP_VINFO_SLP_INSTANCES (res).create (10);
 875   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 876   LOOP_VINFO_PEELING_HTAB (res) = NULL;
 877   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
 878   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
 879   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 880
 881   return res;
 882 }
 883
 884
 885 /* Function destroy_loop_vec_info.
 886
 887    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
 888    stmts in the loop.  */
 889
 890 void
 891 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 892 {
 893   struct loop *loop;
 894   basic_block *bbs;
 895   int nbbs;
 896   gimple_stmt_iterator si;
 897   int j;
 898   vec<slp_instance> slp_instances;
 899   slp_instance instance;
 900   bool swapped;
 901
 902   if (!loop_vinfo)
 903     return;
 904
 905   loop = LOOP_VINFO_LOOP (loop_vinfo);
 906
 907   bbs = LOOP_VINFO_BBS (loop_vinfo);
 908   nbbs = clean_stmts ? loop->num_nodes : 0;
 909   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
 910
 911   for (j = 0; j < nbbs; j++)
 912     {
 913       basic_block bb = bbs[j];
 914       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 915         free_stmt_vec_info (gsi_stmt (si));
 916
 917       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
 918         {
 919           gimple stmt = gsi_stmt (si);
 920
 921           /* We may have broken canonical form by moving a constant
 922              into RHS1 of a commutative op.  Fix such occurrences.  */
 923           if (swapped && is_gimple_assign (stmt))
 924             {
 925               enum tree_code code = gimple_assign_rhs_code (stmt);
 926
 927               if ((code == PLUS_EXPR
 928                    || code == POINTER_PLUS_EXPR
 929                    || code == MULT_EXPR)
 930                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
 931                 swap_tree_operands (stmt,
 932                                     gimple_assign_rhs1_ptr (stmt),
 933                                     gimple_assign_rhs2_ptr (stmt));
 934             }
 935
 936           /* Free stmt_vec_info.  */
 937           free_stmt_vec_info (stmt);
 938           gsi_next (&si);
 939         }
 940     }
 941
 942   free (LOOP_VINFO_BBS (loop_vinfo));
 943   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
 944   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
 945   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
 946   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
 947   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
 948   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
 949   FOR_EACH_VEC_ELT (slp_instances, j, instance)
 950     vect_free_slp_instance (instance);
 951
 952   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
 953   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
 954   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
 955   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
 956
 957   if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
 958     htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
 959
 960   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
 961
 962   free (loop_vinfo);
 963   loop->aux = NULL;
 964 }
 965
 966
 967 /* Function vect_analyze_loop_1.
 968
 969    Apply a set of analyses on LOOP, and create a loop_vec_info struct
 970    for it. The different analyses will record information in the
 971    loop_vec_info struct.  This is a subset of the analyses applied in
 972    vect_analyze_loop, to be applied on an inner-loop nested in the loop
 973    that is now considered for (outer-loop) vectorization.  */
 974
 975 static loop_vec_info
 976 vect_analyze_loop_1 (struct loop *loop)
 977 {
 978   loop_vec_info loop_vinfo;
 979
 980   if (dump_enabled_p ())
 981     dump_printf_loc (MSG_NOTE, vect_location,
 982                      "===== analyze_loop_nest_1 =====");
 983
 984   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
 985
 986   loop_vinfo = vect_analyze_loop_form (loop);
 987   if (!loop_vinfo)
 988     {
 989       if (dump_enabled_p ())
 990         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 991                          "bad inner-loop form.");
 992       return NULL;
 993     }
 994
 995   return loop_vinfo;
 996 }
 997
 998
 999 /* Function vect_analyze_loop_form.
1000
1001    Verify that certain CFG restrictions hold, including:
1002    - the loop has a pre-header
1003    - the loop has a single entry and exit
1004    - the loop exit condition is simple enough, and the number of iterations
1005      can be analyzed (a countable loop).  */
1006
1007 loop_vec_info
1008 vect_analyze_loop_form (struct loop *loop)
1009 {
1010   loop_vec_info loop_vinfo;
1011   gimple loop_cond;
1012   tree number_of_iterations = NULL;
1013   loop_vec_info inner_loop_vinfo = NULL;
1014
1015   if (dump_enabled_p ())
1016     dump_printf_loc (MSG_NOTE, vect_location,
1017                      "=== vect_analyze_loop_form ===");
1018
1019   /* Different restrictions apply when we are considering an inner-most loop,
1020      vs. an outer (nested) loop.
1021      (FORNOW. May want to relax some of these restrictions in the future).  */
1022
1023   if (!loop->inner)
1024     {
1025       /* Inner-most loop.  We currently require that the number of BBs is
1026          exactly 2 (the header and latch).  Vectorizable inner-most loops
1027          look like this:
1028
1029                         (pre-header)
1030                            |
1031                           header <--------+
1032                            | |            |
1033                            | +--> latch --+
1034                            |
1035                         (exit-bb)  */
1036
1037       if (loop->num_nodes != 2)
1038         {
1039           if (dump_enabled_p ())
1040             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1041                              "not vectorized: control flow in loop.");
1042           return NULL;
1043         }
1044
1045       if (empty_block_p (loop->header))
1046     {
1047           if (dump_enabled_p ())
1048             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1049                              "not vectorized: empty loop.");
1050       return NULL;
1051     }
1052     }
1053   else
1054     {
1055       struct loop *innerloop = loop->inner;
1056       edge entryedge;
1057
1058       /* Nested loop. We currently require that the loop is doubly-nested,
1059          contains a single inner loop, and the number of BBs is exactly 5.
1060          Vectorizable outer-loops look like this:
1061
1062                         (pre-header)
1063                            |
1064                           header <---+
1065                            |         |
1066                           inner-loop |
1067                            |         |
1068                           tail ------+
1069                            |
1070                         (exit-bb)
1071
1072          The inner-loop has the properties expected of inner-most loops
1073          as described above.  */
1074
1075       if ((loop->inner)->inner || (loop->inner)->next)
1076         {
1077           if (dump_enabled_p ())
1078             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1079                              "not vectorized: multiple nested loops.");
1080           return NULL;
1081         }
1082
1083       /* Analyze the inner-loop.  */
1084       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1085       if (!inner_loop_vinfo)
1086         {
1087           if (dump_enabled_p ())
1088             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1089                              "not vectorized: Bad inner loop.");
1090           return NULL;
1091         }
1092
1093       if (!expr_invariant_in_loop_p (loop,
1094                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1095         {
1096           if (dump_enabled_p ())
1097             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1098                              "not vectorized: inner-loop count not invariant.");
1099           destroy_loop_vec_info (inner_loop_vinfo, true);
1100           return NULL;
1101         }
1102
1103       if (loop->num_nodes != 5)
1104         {
1105           if (dump_enabled_p ())
1106             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1107                              "not vectorized: control flow in loop.");
1108           destroy_loop_vec_info (inner_loop_vinfo, true);
1109           return NULL;
1110         }
1111
1112       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1113       entryedge = EDGE_PRED (innerloop->header, 0);
1114       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1115         entryedge = EDGE_PRED (innerloop->header, 1);
1116
1117       if (entryedge->src != loop->header
1118           || !single_exit (innerloop)
1119           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1120         {
1121           if (dump_enabled_p ())
1122             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1123                              "not vectorized: unsupported outerloop form.");
1124           destroy_loop_vec_info (inner_loop_vinfo, true);
1125           return NULL;
1126         }
1127
1128       if (dump_enabled_p ())
1129         dump_printf_loc (MSG_NOTE, vect_location,
1130                          "Considering outer-loop vectorization.");
1131     }
1132
1133   if (!single_exit (loop)
1134       || EDGE_COUNT (loop->header->preds) != 2)
1135     {
1136       if (dump_enabled_p ())
1137         {
1138           if (!single_exit (loop))
1139             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1140                              "not vectorized: multiple exits.");
1141           else if (EDGE_COUNT (loop->header->preds) != 2)
1142             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1143                              "not vectorized: too many incoming edges.");
1144         }
1145       if (inner_loop_vinfo)
1146         destroy_loop_vec_info (inner_loop_vinfo, true);
1147       return NULL;
1148     }
1149
1150   /* We assume that the loop exit condition is at the end of the loop. i.e,
1151      that the loop is represented as a do-while (with a proper if-guard
1152      before the loop if needed), where the loop header contains all the
1153      executable statements, and the latch is empty.  */
1154   if (!empty_block_p (loop->latch)
1155       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1156     {
1157       if (dump_enabled_p ())
1158         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1159                          "not vectorized: latch block not empty.");
1160       if (inner_loop_vinfo)
1161         destroy_loop_vec_info (inner_loop_vinfo, true);
1162       return NULL;
1163     }
1164
1165   /* Make sure there exists a single-predecessor exit bb:  */
1166   if (!single_pred_p (single_exit (loop)->dest))
1167     {
1168       edge e = single_exit (loop);
1169       if (!(e->flags & EDGE_ABNORMAL))
1170         {
1171           split_loop_exit_edge (e);
1172           if (dump_enabled_p ())
1173             dump_printf (MSG_NOTE, "split exit edge.");
1174         }
1175       else
1176         {
1177           if (dump_enabled_p ())
1178             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1179                              "not vectorized: abnormal loop exit edge.");
1180           if (inner_loop_vinfo)
1181             destroy_loop_vec_info (inner_loop_vinfo, true);
1182           return NULL;
1183         }
1184     }
1185
1186   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1187   if (!loop_cond)
1188     {
1189       if (dump_enabled_p ())
1190         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1191                          "not vectorized: complicated exit condition.");
1192       if (inner_loop_vinfo)
1193         destroy_loop_vec_info (inner_loop_vinfo, true);
1194       return NULL;
1195     }
1196
1197   if (!number_of_iterations)
1198     {
1199       if (dump_enabled_p ())
1200         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1201                          "not vectorized: number of iterations cannot be "
1202                          "computed.");
1203       if (inner_loop_vinfo)
1204         destroy_loop_vec_info (inner_loop_vinfo, true);
1205       return NULL;
1206     }
1207
1208   if (chrec_contains_undetermined (number_of_iterations))
1209     {
1210       if (dump_enabled_p ())
1211             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212                              "Infinite number of iterations.");
1213       if (inner_loop_vinfo)
1214         destroy_loop_vec_info (inner_loop_vinfo, true);
1215       return NULL;
1216     }
1217
1218   if (!NITERS_KNOWN_P (number_of_iterations))
1219     {
1220       if (dump_enabled_p ())
1221         {
1222           dump_printf_loc (MSG_NOTE, vect_location,
1223                            "Symbolic number of iterations is ");
1224           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1225         }
1226     }
1227   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1228     {
1229       if (dump_enabled_p ())
1230         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1231                          "not vectorized: number of iterations = 0.");
1232       if (inner_loop_vinfo)
1233         destroy_loop_vec_info (inner_loop_vinfo, true);
1234       return NULL;
1235     }
1236
1237   loop_vinfo = new_loop_vec_info (loop);
1238   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1239   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1240
1241   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1242
1243   /* CHECKME: May want to keep it around it in the future.  */
1244   if (inner_loop_vinfo)
1245     destroy_loop_vec_info (inner_loop_vinfo, false);
1246
1247   gcc_assert (!loop->aux);
1248   loop->aux = loop_vinfo;
1249   return loop_vinfo;
1250 }
1251
1252
1253 /* Function vect_analyze_loop_operations.
1254
1255    Scan the loop stmts and make sure they are all vectorizable.  */
1256
1257 static bool
1258 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1259 {
1260   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1261   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1262   int nbbs = loop->num_nodes;
1263   gimple_stmt_iterator si;
1264   unsigned int vectorization_factor = 0;
1265   int i;
1266   gimple phi;
1267   stmt_vec_info stmt_info;
1268   bool need_to_vectorize = false;
1269   int min_profitable_iters;
1270   int min_scalar_loop_bound;
1271   unsigned int th;
1272   bool only_slp_in_loop = true, ok;
1273   HOST_WIDE_INT max_niter;
1274   HOST_WIDE_INT estimated_niter;
1275   int min_profitable_estimate;
1276
1277   if (dump_enabled_p ())
1278     dump_printf_loc (MSG_NOTE, vect_location,
1279                      "=== vect_analyze_loop_operations ===");
1280
1281   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1282   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1283   if (slp)
1284     {
1285       /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1286          vectorization factor of the loop is the unrolling factor required by
1287          the SLP instances.  If that unrolling factor is 1, we say, that we
1288          perform pure SLP on loop - cross iteration parallelism is not
1289          exploited.  */
1290       for (i = 0; i < nbbs; i++)
1291         {
1292           basic_block bb = bbs[i];
1293           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1294             {
1295               gimple stmt = gsi_stmt (si);
1296               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1297               gcc_assert (stmt_info);
1298               if ((STMT_VINFO_RELEVANT_P (stmt_info)
1299                    || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1300                   && !PURE_SLP_STMT (stmt_info))
1301                 /* STMT needs both SLP and loop-based vectorization.  */
1302                 only_slp_in_loop = false;
1303             }
1304         }
1305
1306       if (only_slp_in_loop)
1307         vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1308       else
1309         vectorization_factor = least_common_multiple (vectorization_factor,
1310                                 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1311
1312       LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1313       if (dump_enabled_p ())
1314         dump_printf_loc (MSG_NOTE, vect_location,
1315                          "Updating vectorization factor to %d ",
1316                          vectorization_factor);
1317     }
1318
1319   for (i = 0; i < nbbs; i++)
1320     {
1321       basic_block bb = bbs[i];
1322
1323       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1324         {
1325           phi = gsi_stmt (si);
1326           ok = true;
1327
1328           stmt_info = vinfo_for_stmt (phi);
1329           if (dump_enabled_p ())
1330             {
1331               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1332               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1333             }
1334
1335           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1336              (i.e., a phi in the tail of the outer-loop).  */
1337           if (! is_loop_header_bb_p (bb))
1338             {
1339               /* FORNOW: we currently don't support the case that these phis
1340                  are not used in the outerloop (unless it is double reduction,
1341                  i.e., this phi is vect_reduction_def), cause this case
1342                  requires to actually do something here.  */
1343               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1344                    || STMT_VINFO_LIVE_P (stmt_info))
1345                   && STMT_VINFO_DEF_TYPE (stmt_info)
1346                      != vect_double_reduction_def)
1347                 {
1348                   if (dump_enabled_p ())
1349                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1350                                      "Unsupported loop-closed phi in "
1351                                      "outer-loop.");
1352                   return false;
1353                 }
1354
1355               /* If PHI is used in the outer loop, we check that its operand
1356                  is defined in the inner loop.  */
1357               if (STMT_VINFO_RELEVANT_P (stmt_info))
1358                 {
1359                   tree phi_op;
1360                   gimple op_def_stmt;
1361
1362                   if (gimple_phi_num_args (phi) != 1)
1363                     return false;
1364
1365                   phi_op = PHI_ARG_DEF (phi, 0);
1366                   if (TREE_CODE (phi_op) != SSA_NAME)
1367                     return false;
1368
1369                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1370                   if (!op_def_stmt
1371                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1372                       || !vinfo_for_stmt (op_def_stmt))
1373                     return false;
1374
1375                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1376                         != vect_used_in_outer
1377                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1378                            != vect_used_in_outer_by_reduction)
1379                     return false;
1380                 }
1381
1382               continue;
1383             }
1384
1385           gcc_assert (stmt_info);
1386
1387           if (STMT_VINFO_LIVE_P (stmt_info))
1388             {
1389               /* FORNOW: not yet supported.  */
1390               if (dump_enabled_p ())
1391                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392                                  "not vectorized: value used after loop.");
1393               return false;
1394             }
1395
1396           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1397               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1398             {
1399               /* A scalar-dependence cycle that we don't support.  */
1400               if (dump_enabled_p ())
1401                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1402                                  "not vectorized: scalar dependence cycle.");
1403               return false;
1404             }
1405
1406           if (STMT_VINFO_RELEVANT_P (stmt_info))
1407             {
1408               need_to_vectorize = true;
1409               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1410                 ok = vectorizable_induction (phi, NULL, NULL);
1411             }
1412
1413           if (!ok)
1414             {
1415               if (dump_enabled_p ())
1416                 {
1417                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1418                                    "not vectorized: relevant phi not "
1419                                    "supported: ");
1420                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1421                 }
1422               return false;
1423             }
1424         }
1425
1426       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1427         {
1428           gimple stmt = gsi_stmt (si);
1429           if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1430             return false;
1431         }
1432     } /* bbs */
1433
1434   /* All operations in the loop are either irrelevant (deal with loop
1435      control, or dead), or only used outside the loop and can be moved
1436      out of the loop (e.g. invariants, inductions).  The loop can be
1437      optimized away by scalar optimizations.  We're better off not
1438      touching this loop.  */
1439   if (!need_to_vectorize)
1440     {
1441       if (dump_enabled_p ())
1442         dump_printf_loc (MSG_NOTE, vect_location,
1443                          "All the computation can be taken out of the loop.");
1444       if (dump_enabled_p ())
1445         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1446                          "not vectorized: redundant loop. no profit to "
1447                          "vectorize.");
1448       return false;
1449     }
1450
1451   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1452     dump_printf_loc (MSG_NOTE, vect_location,
1453                      "vectorization_factor = %d, niters = "
1454                      HOST_WIDE_INT_PRINT_DEC, vectorization_factor,
1455                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1456
1457   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1458        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1459       || ((max_niter = max_stmt_executions_int (loop)) != -1
1460           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1461     {
1462       if (dump_enabled_p ())
1463         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1464                          "not vectorized: iteration count too small.");
1465       if (dump_enabled_p ())
1466         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1467                          "not vectorized: iteration count smaller than "
1468                          "vectorization factor.");
1469       return false;
1470     }
1471
1472   /* Analyze cost.  Decide if worth while to vectorize.  */
1473
1474   /* Once VF is set, SLP costs should be updated since the number of created
1475      vector stmts depends on VF.  */
1476   vect_update_slp_costs_according_to_vf (loop_vinfo);
1477
1478   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1479                                       &min_profitable_estimate);
1480   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1481
1482   if (min_profitable_iters < 0)
1483     {
1484       if (dump_enabled_p ())
1485         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1486                          "not vectorized: vectorization not profitable.");
1487       if (dump_enabled_p ())
1488         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1489                          "not vectorized: vector version will never be "
1490                          "profitable.");
1491       return false;
1492     }
1493
1494   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1495                             * vectorization_factor) - 1);
1496
1497
1498   /* Use the cost model only if it is more conservative than user specified
1499      threshold.  */
1500
1501   th = (unsigned) min_scalar_loop_bound;
1502   if (min_profitable_iters
1503       && (!min_scalar_loop_bound
1504           || min_profitable_iters > min_scalar_loop_bound))
1505     th = (unsigned) min_profitable_iters;
1506
1507   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1508       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1509     {
1510       if (dump_enabled_p ())
1511         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1512                          "not vectorized: vectorization not profitable.");
1513       if (dump_enabled_p ())
1514         dump_printf_loc (MSG_NOTE, vect_location,
1515                          "not vectorized: iteration count smaller than user "
1516                          "specified loop bound parameter or minimum profitable "
1517                          "iterations (whichever is more conservative).");
1518       return false;
1519     }
1520
1521   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1522       && ((unsigned HOST_WIDE_INT) estimated_niter
1523           <= MAX (th, (unsigned)min_profitable_estimate)))
1524     {
1525       if (dump_enabled_p ())
1526         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1527                          "not vectorized: estimated iteration count too "
1528                          "small.");
1529       if (dump_enabled_p ())
1530         dump_printf_loc (MSG_NOTE, vect_location,
1531                          "not vectorized: estimated iteration count smaller "
1532                          "than specified loop bound parameter or minimum "
1533                          "profitable iterations (whichever is more "
1534                          "conservative).");
1535       return false;
1536     }
1537
1538   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1539       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1540       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1541     {
1542       if (dump_enabled_p ())
1543         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.");
1544       if (!vect_can_advance_ivs_p (loop_vinfo))
1545         {
1546           if (dump_enabled_p ())
1547             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548                              "not vectorized: can't create epilog loop 1.");
1549           return false;
1550         }
1551       if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1552         {
1553           if (dump_enabled_p ())
1554             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1555                              "not vectorized: can't create epilog loop 2.");
1556           return false;
1557         }
1558     }
1559
1560   return true;
1561 }
1562
1563
1564 /* Function vect_analyze_loop_2.
1565
1566    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1567    for it.  The different analyses will record information in the
1568    loop_vec_info struct.  */
1569 static bool
1570 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1571 {
1572   bool ok, slp = false;
1573   int max_vf = MAX_VECTORIZATION_FACTOR;
1574   int min_vf = 2;
1575
1576   /* Find all data references in the loop (which correspond to vdefs/vuses)
1577      and analyze their evolution in the loop.  Also adjust the minimal
1578      vectorization factor according to the loads and stores.
1579
1580      FORNOW: Handle only simple, array references, which
1581      alignment can be forced, and aligned pointer-references.  */
1582
1583   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1584   if (!ok)
1585     {
1586       if (dump_enabled_p ())
1587         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1588                          "bad data references.");
1589       return false;
1590     }
1591
1592   /* Classify all cross-iteration scalar data-flow cycles.
1593      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1594
1595   vect_analyze_scalar_cycles (loop_vinfo);
1596
1597   vect_pattern_recog (loop_vinfo, NULL);
1598
1599   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1600
1601   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1602   if (!ok)
1603     {
1604       if (dump_enabled_p ())
1605         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1606                          "unexpected pattern.");
1607       return false;
1608     }
1609
1610   /* Analyze data dependences between the data-refs in the loop
1611      and adjust the maximum vectorization factor according to
1612      the dependences.
1613      FORNOW: fail at the first data dependence that we encounter.  */
1614
1615   ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
1616   if (!ok
1617       || max_vf < min_vf)
1618     {
1619       if (dump_enabled_p ())
1620             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1621                              "bad data dependence.");
1622       return false;
1623     }
1624
1625   ok = vect_determine_vectorization_factor (loop_vinfo);
1626   if (!ok)
1627     {
1628       if (dump_enabled_p ())
1629         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630                          "can't determine vectorization factor.");
1631       return false;
1632     }
1633   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1634     {
1635       if (dump_enabled_p ())
1636         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1637                          "bad data dependence.");
1638       return false;
1639     }
1640
1641   /* Analyze the alignment of the data-refs in the loop.
1642      Fail if a data reference is found that cannot be vectorized.  */
1643
1644   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1645   if (!ok)
1646     {
1647       if (dump_enabled_p ())
1648         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1649                          "bad data alignment.");
1650       return false;
1651     }
1652
1653   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1654      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1655
1656   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1657   if (!ok)
1658     {
1659       if (dump_enabled_p ())
1660         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1661                          "bad data access.");
1662       return false;
1663     }
1664
1665   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1666      It is important to call pruning after vect_analyze_data_ref_accesses,
1667      since we use grouping information gathered by interleaving analysis.  */
1668   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1669   if (!ok)
1670     {
1671       if (dump_enabled_p ())
1672         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1673                          "too long list of versioning for alias "
1674                          "run-time tests.");
1675       return false;
1676     }
1677
1678   /* This pass will decide on using loop versioning and/or loop peeling in
1679      order to enhance the alignment of data references in the loop.  */
1680
1681   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1682   if (!ok)
1683     {
1684       if (dump_enabled_p ())
1685         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                          "bad data alignment.");
1687       return false;
1688     }
1689
1690   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1691   ok = vect_analyze_slp (loop_vinfo, NULL);
1692   if (ok)
1693     {
1694       /* Decide which possible SLP instances to SLP.  */
1695       slp = vect_make_slp_decision (loop_vinfo);
1696
1697       /* Find stmts that need to be both vectorized and SLPed.  */
1698       vect_detect_hybrid_slp (loop_vinfo);
1699     }
1700   else
1701     return false;
1702
1703   /* Scan all the operations in the loop and make sure they are
1704      vectorizable.  */
1705
1706   ok = vect_analyze_loop_operations (loop_vinfo, slp);
1707   if (!ok)
1708     {
1709       if (dump_enabled_p ())
1710         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1711                          "bad operation or unsupported loop bound.");
1712       return false;
1713     }
1714
1715   return true;
1716 }
1717
1718 /* Function vect_analyze_loop.
1719
1720    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1721    for it.  The different analyses will record information in the
1722    loop_vec_info struct.  */
1723 loop_vec_info
1724 vect_analyze_loop (struct loop *loop)
1725 {
1726   loop_vec_info loop_vinfo;
1727   unsigned int vector_sizes;
1728
1729   /* Autodetect first vector size we try.  */
1730   current_vector_size = 0;
1731   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1732
1733   if (dump_enabled_p ())
1734     dump_printf_loc (MSG_NOTE, vect_location,
1735                      "===== analyze_loop_nest =====");
1736
1737   if (loop_outer (loop)
1738       && loop_vec_info_for_loop (loop_outer (loop))
1739       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1740     {
1741       if (dump_enabled_p ())
1742         dump_printf_loc (MSG_NOTE, vect_location,
1743                          "outer-loop already vectorized.");
1744       return NULL;
1745     }
1746
1747   while (1)
1748     {
1749       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
1750       loop_vinfo = vect_analyze_loop_form (loop);
1751       if (!loop_vinfo)
1752         {
1753           if (dump_enabled_p ())
1754             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1755                              "bad loop form.");
1756           return NULL;
1757         }
1758
1759       if (vect_analyze_loop_2 (loop_vinfo))
1760         {
1761           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1762
1763           return loop_vinfo;
1764         }
1765
1766       destroy_loop_vec_info (loop_vinfo, true);
1767
1768       vector_sizes &= ~current_vector_size;
1769       if (vector_sizes == 0
1770           || current_vector_size == 0)
1771         return NULL;
1772
1773       /* Try the next biggest vector size.  */
1774       current_vector_size = 1 << floor_log2 (vector_sizes);
1775       if (dump_enabled_p ())
1776         dump_printf_loc (MSG_NOTE, vect_location,
1777                          "***** Re-trying analysis with "
1778                          "vector size %d\n", current_vector_size);
1779     }
1780 }
1781
1782
1783 /* Function reduction_code_for_scalar_code
1784
1785    Input:
1786    CODE - tree_code of a reduction operations.
1787
1788    Output:
1789    REDUC_CODE - the corresponding tree-code to be used to reduce the
1790       vector of partial results into a single scalar result (which
1791       will also reside in a vector) or ERROR_MARK if the operation is
1792       a supported reduction operation, but does not have such tree-code.
1793
1794    Return FALSE if CODE currently cannot be vectorized as reduction.  */
1795
1796 static bool
1797 reduction_code_for_scalar_code (enum tree_code code,
1798                                 enum tree_code *reduc_code)
1799 {
1800   switch (code)
1801     {
1802       case MAX_EXPR:
1803         *reduc_code = REDUC_MAX_EXPR;
1804         return true;
1805
1806       case MIN_EXPR:
1807         *reduc_code = REDUC_MIN_EXPR;
1808         return true;
1809
1810       case PLUS_EXPR:
1811         *reduc_code = REDUC_PLUS_EXPR;
1812         return true;
1813
1814       case MULT_EXPR:
1815       case MINUS_EXPR:
1816       case BIT_IOR_EXPR:
1817       case BIT_XOR_EXPR:
1818       case BIT_AND_EXPR:
1819         *reduc_code = ERROR_MARK;
1820         return true;
1821
1822       default:
1823        return false;
1824     }
1825 }
1826
1827
1828 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
1829    STMT is printed with a message MSG. */
1830
1831 static void
1832 report_vect_op (int msg_type, gimple stmt, const char *msg)
1833 {
1834   dump_printf_loc (msg_type, vect_location, "%s", msg);
1835   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
1836 }
1837
1838
1839 /* Detect SLP reduction of the form:
1840
1841    #a1 = phi <a5, a0>
1842    a2 = operation (a1)
1843    a3 = operation (a2)
1844    a4 = operation (a3)
1845    a5 = operation (a4)
1846
1847    #a = phi <a5>
1848
1849    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1850    FIRST_STMT is the first reduction stmt in the chain
1851    (a2 = operation (a1)).
1852
1853    Return TRUE if a reduction chain was detected.  */
1854
1855 static bool
1856 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1857 {
1858   struct loop *loop = (gimple_bb (phi))->loop_father;
1859   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1860   enum tree_code code;
1861   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
1862   stmt_vec_info use_stmt_info, current_stmt_info;
1863   tree lhs;
1864   imm_use_iterator imm_iter;
1865   use_operand_p use_p;
1866   int nloop_uses, size = 0, n_out_of_loop_uses;
1867   bool found = false;
1868
1869   if (loop != vect_loop)
1870     return false;
1871
1872   lhs = PHI_RESULT (phi);
1873   code = gimple_assign_rhs_code (first_stmt);
1874   while (1)
1875     {
1876       nloop_uses = 0;
1877       n_out_of_loop_uses = 0;
1878       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1879         {
1880           gimple use_stmt = USE_STMT (use_p);
1881           if (is_gimple_debug (use_stmt))
1882             continue;
1883
1884           use_stmt = USE_STMT (use_p);
1885
1886           /* Check if we got back to the reduction phi.  */
1887           if (use_stmt == phi)
1888             {
1889               loop_use_stmt = use_stmt;
1890               found = true;
1891               break;
1892             }
1893
1894           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1895             {
1896               if (vinfo_for_stmt (use_stmt)
1897                   && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1898                 {
1899                   loop_use_stmt = use_stmt;
1900                   nloop_uses++;
1901                 }
1902             }
1903            else
1904              n_out_of_loop_uses++;
1905
1906            /* There are can be either a single use in the loop or two uses in
1907               phi nodes.  */
1908            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
1909              return false;
1910         }
1911
1912       if (found)
1913         break;
1914
1915       /* We reached a statement with no loop uses.  */
1916       if (nloop_uses == 0)
1917         return false;
1918
1919       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
1920       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
1921         return false;
1922
1923       if (!is_gimple_assign (loop_use_stmt)
1924           || code != gimple_assign_rhs_code (loop_use_stmt)
1925           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
1926         return false;
1927
1928       /* Insert USE_STMT into reduction chain.  */
1929       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
1930       if (current_stmt)
1931         {
1932           current_stmt_info = vinfo_for_stmt (current_stmt);
1933           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
1934           GROUP_FIRST_ELEMENT (use_stmt_info)
1935             = GROUP_FIRST_ELEMENT (current_stmt_info);
1936         }
1937       else
1938         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
1939
1940       lhs = gimple_assign_lhs (loop_use_stmt);
1941       current_stmt = loop_use_stmt;
1942       size++;
1943    }
1944
1945   if (!found || loop_use_stmt != phi || size < 2)
1946     return false;
1947
1948   /* Swap the operands, if needed, to make the reduction operand be the second
1949      operand.  */
1950   lhs = PHI_RESULT (phi);
1951   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
1952   while (next_stmt)
1953     {
1954       if (gimple_assign_rhs2 (next_stmt) == lhs)
1955         {
1956           tree op = gimple_assign_rhs1 (next_stmt);
1957           gimple def_stmt = NULL;
1958
1959           if (TREE_CODE (op) == SSA_NAME)
1960             def_stmt = SSA_NAME_DEF_STMT (op);
1961
1962           /* Check that the other def is either defined in the loop
1963              ("vect_internal_def"), or it's an induction (defined by a
1964              loop-header phi-node).  */
1965           if (def_stmt
1966               && gimple_bb (def_stmt)
1967               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1968               && (is_gimple_assign (def_stmt)
1969                   || is_gimple_call (def_stmt)
1970                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1971                            == vect_induction_def
1972                   || (gimple_code (def_stmt) == GIMPLE_PHI
1973                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
1974                                   == vect_internal_def
1975                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
1976             {
1977               lhs = gimple_assign_lhs (next_stmt);
1978               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1979               continue;
1980             }
1981
1982           return false;
1983         }
1984       else
1985         {
1986           tree op = gimple_assign_rhs2 (next_stmt);
1987           gimple def_stmt = NULL;
1988
1989           if (TREE_CODE (op) == SSA_NAME)
1990             def_stmt = SSA_NAME_DEF_STMT (op);
1991
1992           /* Check that the other def is either defined in the loop
1993             ("vect_internal_def"), or it's an induction (defined by a
1994             loop-header phi-node).  */
1995           if (def_stmt
1996               && gimple_bb (def_stmt)
1997               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1998               && (is_gimple_assign (def_stmt)
1999                   || is_gimple_call (def_stmt)
2000                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2001                               == vect_induction_def
2002                   || (gimple_code (def_stmt) == GIMPLE_PHI
2003                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2004                                   == vect_internal_def
2005                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2006             {
2007               if (dump_enabled_p ())
2008                 {
2009                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2010                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2011                 }
2012
2013               swap_tree_operands (next_stmt,
2014                                   gimple_assign_rhs1_ptr (next_stmt),
2015                                   gimple_assign_rhs2_ptr (next_stmt));
2016               update_stmt (next_stmt);
2017
2018               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2019                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2020             }
2021           else
2022             return false;
2023         }
2024
2025       lhs = gimple_assign_lhs (next_stmt);
2026       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2027     }
2028
2029   /* Save the chain for further analysis in SLP detection.  */
2030   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2031   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2032   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2033
2034   return true;
2035 }
2036
2037
2038 /* Function vect_is_simple_reduction_1
2039
2040    (1) Detect a cross-iteration def-use cycle that represents a simple
2041    reduction computation.  We look for the following pattern:
2042
2043    loop_header:
2044      a1 = phi < a0, a2 >
2045      a3 = ...
2046      a2 = operation (a3, a1)
2047
2048    such that:
2049    1. operation is commutative and associative and it is safe to
2050       change the order of the computation (if CHECK_REDUCTION is true)
2051    2. no uses for a2 in the loop (a2 is used out of the loop)
2052    3. no uses of a1 in the loop besides the reduction operation
2053    4. no uses of a1 outside the loop.
2054
2055    Conditions 1,4 are tested here.
2056    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2057
2058    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2059    nested cycles, if CHECK_REDUCTION is false.
2060
2061    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2062    reductions:
2063
2064      a1 = phi < a0, a2 >
2065      inner loop (def of a3)
2066      a2 = phi < a3 >
2067
2068    If MODIFY is true it tries also to rework the code in-place to enable
2069    detection of more reduction patterns.  For the time being we rewrite
2070    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2071 */
2072
2073 static gimple
2074 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2075                             bool check_reduction, bool *double_reduc,
2076                             bool modify)
2077 {
2078   struct loop *loop = (gimple_bb (phi))->loop_father;
2079   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2080   edge latch_e = loop_latch_edge (loop);
2081   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2082   gimple def_stmt, def1 = NULL, def2 = NULL;
2083   enum tree_code orig_code, code;
2084   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2085   tree type;
2086   int nloop_uses;
2087   tree name;
2088   imm_use_iterator imm_iter;
2089   use_operand_p use_p;
2090   bool phi_def;
2091
2092   *double_reduc = false;
2093
2094   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2095      otherwise, we assume outer loop vectorization.  */
2096   gcc_assert ((check_reduction && loop == vect_loop)
2097               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2098
2099   name = PHI_RESULT (phi);
2100   /* ???  If there are no uses of the PHI result the inner loop reduction
2101      won't be detected as possibly double-reduction by vectorizable_reduction
2102      because that tries to walk the PHI arg from the preheader edge which
2103      can be constant.  See PR60382.  */
2104   if (has_zero_uses (name))
2105     return NULL;
2106   nloop_uses = 0;
2107   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2108     {
2109       gimple use_stmt = USE_STMT (use_p);
2110       if (is_gimple_debug (use_stmt))
2111         continue;
2112
2113       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2114         {
2115           if (dump_enabled_p ())
2116             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2117                              "intermediate value used outside loop.");
2118
2119           return NULL;
2120         }
2121
2122       if (vinfo_for_stmt (use_stmt)
2123           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2124         nloop_uses++;
2125       if (nloop_uses > 1)
2126         {
2127           if (dump_enabled_p ())
2128             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129                              "reduction used in loop.");
2130           return NULL;
2131         }
2132     }
2133
2134   if (TREE_CODE (loop_arg) != SSA_NAME)
2135     {
2136       if (dump_enabled_p ())
2137         {
2138           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2139                            "reduction: not ssa_name: ");
2140           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2141         }
2142       return NULL;
2143     }
2144
2145   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2146   if (!def_stmt)
2147     {
2148       if (dump_enabled_p ())
2149         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150                          "reduction: no def_stmt.");
2151       return NULL;
2152     }
2153
2154   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2155     {
2156       if (dump_enabled_p ())
2157         dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2158       return NULL;
2159     }
2160
2161   if (is_gimple_assign (def_stmt))
2162     {
2163       name = gimple_assign_lhs (def_stmt);
2164       phi_def = false;
2165     }
2166   else
2167     {
2168       name = PHI_RESULT (def_stmt);
2169       phi_def = true;
2170     }
2171
2172   nloop_uses = 0;
2173   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2174     {
2175       gimple use_stmt = USE_STMT (use_p);
2176       if (is_gimple_debug (use_stmt))
2177         continue;
2178       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2179           && vinfo_for_stmt (use_stmt)
2180           && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2181         nloop_uses++;
2182       if (nloop_uses > 1)
2183         {
2184           if (dump_enabled_p ())
2185             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2186                              "reduction used in loop.");
2187           return NULL;
2188         }
2189     }
2190
2191   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2192      defined in the inner loop.  */
2193   if (phi_def)
2194     {
2195       op1 = PHI_ARG_DEF (def_stmt, 0);
2196
2197       if (gimple_phi_num_args (def_stmt) != 1
2198           || TREE_CODE (op1) != SSA_NAME)
2199         {
2200           if (dump_enabled_p ())
2201             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202                              "unsupported phi node definition.");
2203
2204           return NULL;
2205         }
2206
2207       def1 = SSA_NAME_DEF_STMT (op1);
2208       if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2209           && loop->inner
2210           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2211           && is_gimple_assign (def1))
2212         {
2213           if (dump_enabled_p ())
2214             report_vect_op (MSG_NOTE, def_stmt,
2215                             "detected double reduction: ");
2216
2217           *double_reduc = true;
2218           return def_stmt;
2219         }
2220
2221       return NULL;
2222     }
2223
2224   code = orig_code = gimple_assign_rhs_code (def_stmt);
2225
2226   /* We can handle "res -= x[i]", which is non-associative by
2227      simply rewriting this into "res += -x[i]".  Avoid changing
2228      gimple instruction for the first simple tests and only do this
2229      if we're allowed to change code at all.  */
2230   if (code == MINUS_EXPR
2231       && modify
2232       && (op1 = gimple_assign_rhs1 (def_stmt))
2233       && TREE_CODE (op1) == SSA_NAME
2234       && SSA_NAME_DEF_STMT (op1) == phi)
2235     code = PLUS_EXPR;
2236
2237   if (check_reduction
2238       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2239     {
2240       if (dump_enabled_p ())
2241         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2242                         "reduction: not commutative/associative: ");
2243       return NULL;
2244     }
2245
2246   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2247     {
2248       if (code != COND_EXPR)
2249         {
2250           if (dump_enabled_p ())
2251             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2252                             "reduction: not binary operation: ");
2253
2254           return NULL;
2255         }
2256
2257       op3 = gimple_assign_rhs1 (def_stmt);
2258       if (COMPARISON_CLASS_P (op3))
2259         {
2260           op4 = TREE_OPERAND (op3, 1);
2261           op3 = TREE_OPERAND (op3, 0);
2262         }
2263
2264       op1 = gimple_assign_rhs2 (def_stmt);
2265       op2 = gimple_assign_rhs3 (def_stmt);
2266
2267       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2268         {
2269           if (dump_enabled_p ())
2270             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2271                             "reduction: uses not ssa_names: ");
2272
2273           return NULL;
2274         }
2275     }
2276   else
2277     {
2278       op1 = gimple_assign_rhs1 (def_stmt);
2279       op2 = gimple_assign_rhs2 (def_stmt);
2280
2281       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2282         {
2283           if (dump_enabled_p ())
2284             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2285                             "reduction: uses not ssa_names: ");
2286
2287           return NULL;
2288         }
2289    }
2290
2291   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2292   if ((TREE_CODE (op1) == SSA_NAME
2293        && !types_compatible_p (type,TREE_TYPE (op1)))
2294       || (TREE_CODE (op2) == SSA_NAME
2295           && !types_compatible_p (type, TREE_TYPE (op2)))
2296       || (op3 && TREE_CODE (op3) == SSA_NAME
2297           && !types_compatible_p (type, TREE_TYPE (op3)))
2298       || (op4 && TREE_CODE (op4) == SSA_NAME
2299           && !types_compatible_p (type, TREE_TYPE (op4))))
2300     {
2301       if (dump_enabled_p ())
2302         {
2303           dump_printf_loc (MSG_NOTE, vect_location,
2304                            "reduction: multiple types: operation type: ");
2305           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2306           dump_printf (MSG_NOTE, ", operands types: ");
2307           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2308                              TREE_TYPE (op1));
2309           dump_printf (MSG_NOTE, ",");
2310           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2311                              TREE_TYPE (op2));
2312           if (op3)
2313             {
2314               dump_printf (MSG_NOTE, ",");
2315               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2316                                  TREE_TYPE (op3));
2317             }
2318
2319           if (op4)
2320             {
2321               dump_printf (MSG_NOTE, ",");
2322               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2323                                  TREE_TYPE (op4));
2324             }
2325         }
2326
2327       return NULL;
2328     }
2329
2330   /* Check that it's ok to change the order of the computation.
2331      Generally, when vectorizing a reduction we change the order of the
2332      computation.  This may change the behavior of the program in some
2333      cases, so we need to check that this is ok.  One exception is when
2334      vectorizing an outer-loop: the inner-loop is executed sequentially,
2335      and therefore vectorizing reductions in the inner-loop during
2336      outer-loop vectorization is safe.  */
2337
2338   /* CHECKME: check for !flag_finite_math_only too?  */
2339   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2340       && check_reduction)
2341     {
2342       /* Changing the order of operations changes the semantics.  */
2343       if (dump_enabled_p ())
2344         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2345                         "reduction: unsafe fp math optimization: ");
2346       return NULL;
2347     }
2348   else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2349            && check_reduction)
2350     {
2351       /* Changing the order of operations changes the semantics.  */
2352       if (dump_enabled_p ())
2353         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2354                         "reduction: unsafe int math optimization: ");
2355       return NULL;
2356     }
2357   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2358     {
2359       /* Changing the order of operations changes the semantics.  */
2360       if (dump_enabled_p ())
2361         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2362                         "reduction: unsafe fixed-point math optimization: ");
2363       return NULL;
2364     }
2365
2366   /* If we detected "res -= x[i]" earlier, rewrite it into
2367      "res += -x[i]" now.  If this turns out to be useless reassoc
2368      will clean it up again.  */
2369   if (orig_code == MINUS_EXPR)
2370     {
2371       tree rhs = gimple_assign_rhs2 (def_stmt);
2372       tree negrhs = make_ssa_name (TREE_TYPE (rhs), NULL);
2373       gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
2374                                                          rhs, NULL);
2375       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2376       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2377                                                           loop_info, NULL));
2378       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2379       gimple_assign_set_rhs2 (def_stmt, negrhs);
2380       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2381       update_stmt (def_stmt);
2382     }
2383
2384   /* Reduction is safe. We're dealing with one of the following:
2385      1) integer arithmetic and no trapv
2386      2) floating point arithmetic, and special flags permit this optimization
2387      3) nested cycle (i.e., outer loop vectorization).  */
2388   if (TREE_CODE (op1) == SSA_NAME)
2389     def1 = SSA_NAME_DEF_STMT (op1);
2390
2391   if (TREE_CODE (op2) == SSA_NAME)
2392     def2 = SSA_NAME_DEF_STMT (op2);
2393
2394   if (code != COND_EXPR
2395       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2396     {
2397       if (dump_enabled_p ())
2398         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2399       return NULL;
2400     }
2401
2402   /* Check that one def is the reduction def, defined by PHI,
2403      the other def is either defined in the loop ("vect_internal_def"),
2404      or it's an induction (defined by a loop-header phi-node).  */
2405
2406   if (def2 && def2 == phi
2407       && (code == COND_EXPR
2408           || !def1 || gimple_nop_p (def1)
2409           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2410               && (is_gimple_assign (def1)
2411                   || is_gimple_call (def1)
2412                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2413                       == vect_induction_def
2414                   || (gimple_code (def1) == GIMPLE_PHI
2415                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2416                           == vect_internal_def
2417                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2418     {
2419       if (dump_enabled_p ())
2420         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2421       return def_stmt;
2422     }
2423
2424   if (def1 && def1 == phi
2425       && (code == COND_EXPR
2426           || !def2 || gimple_nop_p (def2)
2427           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2428               && (is_gimple_assign (def2)
2429                   || is_gimple_call (def2)
2430                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2431                       == vect_induction_def
2432                   || (gimple_code (def2) == GIMPLE_PHI
2433                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2434                           == vect_internal_def
2435                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2436     {
2437       if (check_reduction)
2438         {
2439           /* Swap operands (just for simplicity - so that the rest of the code
2440              can assume that the reduction variable is always the last (second)
2441              argument).  */
2442           if (dump_enabled_p ())
2443             report_vect_op (MSG_NOTE, def_stmt,
2444                             "detected reduction: need to swap operands: ");
2445
2446           swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2447                               gimple_assign_rhs2_ptr (def_stmt));
2448
2449           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2450             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2451         }
2452       else
2453         {
2454           if (dump_enabled_p ())
2455             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2456         }
2457
2458       return def_stmt;
2459     }
2460
2461   /* Try to find SLP reduction chain.  */
2462   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2463     {
2464       if (dump_enabled_p ())
2465         report_vect_op (MSG_NOTE, def_stmt,
2466                         "reduction: detected reduction chain: ");
2467
2468       return def_stmt;
2469     }
2470
2471   if (dump_enabled_p ())
2472     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2473                     "reduction: unknown pattern: ");
2474
2475   return NULL;
2476 }
2477
2478 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2479    in-place.  Arguments as there.  */
2480
2481 static gimple
2482 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2483                           bool check_reduction, bool *double_reduc)
2484 {
2485   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2486                                      double_reduc, false);
2487 }
2488
2489 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2490    in-place if it enables detection of more reductions.  Arguments
2491    as there.  */
2492
2493 gimple
2494 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2495                           bool check_reduction, bool *double_reduc)
2496 {
2497   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2498                                      double_reduc, true);
2499 }
2500
2501 /* Calculate the cost of one scalar iteration of the loop.  */
2502 int
2503 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
2504 {
2505   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2506   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2507   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2508   int innerloop_iters, i, stmt_cost;
2509
2510   /* Count statements in scalar loop.  Using this as scalar cost for a single
2511      iteration for now.
2512
2513      TODO: Add outer loop support.
2514
2515      TODO: Consider assigning different costs to different scalar
2516      statements.  */
2517
2518   /* FORNOW.  */
2519   innerloop_iters = 1;
2520   if (loop->inner)
2521     innerloop_iters = 50; /* FIXME */
2522
2523   for (i = 0; i < nbbs; i++)
2524     {
2525       gimple_stmt_iterator si;
2526       basic_block bb = bbs[i];
2527
2528       if (bb->loop_father == loop->inner)
2529         factor = innerloop_iters;
2530       else
2531         factor = 1;
2532
2533       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2534         {
2535           gimple stmt = gsi_stmt (si);
2536           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2537
2538           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2539             continue;
2540
2541           /* Skip stmts that are not vectorized inside the loop.  */
2542           if (stmt_info
2543               && !STMT_VINFO_RELEVANT_P (stmt_info)
2544               && (!STMT_VINFO_LIVE_P (stmt_info)
2545                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2546               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
2547             continue;
2548
2549           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2550             {
2551               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2552                stmt_cost = vect_get_stmt_cost (scalar_load);
2553              else
2554                stmt_cost = vect_get_stmt_cost (scalar_store);
2555             }
2556           else
2557             stmt_cost = vect_get_stmt_cost (scalar_stmt);
2558
2559           scalar_single_iter_cost += stmt_cost * factor;
2560         }
2561     }
2562   return scalar_single_iter_cost;
2563 }
2564
2565 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2566 int
2567 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2568                              int *peel_iters_epilogue,
2569                              int scalar_single_iter_cost,
2570                              stmt_vector_for_cost *prologue_cost_vec,
2571                              stmt_vector_for_cost *epilogue_cost_vec)
2572 {
2573   int retval = 0;
2574   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2575
2576   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2577     {
2578       *peel_iters_epilogue = vf/2;
2579       if (dump_enabled_p ())
2580         dump_printf_loc (MSG_NOTE, vect_location,
2581                          "cost model: epilogue peel iters set to vf/2 "
2582                          "because loop iterations are unknown .");
2583
2584       /* If peeled iterations are known but number of scalar loop
2585          iterations are unknown, count a taken branch per peeled loop.  */
2586       retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2587                                  NULL, 0, vect_prologue);
2588     }
2589   else
2590     {
2591       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2592       peel_iters_prologue = niters < peel_iters_prologue ?
2593                             niters : peel_iters_prologue;
2594       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2595       /* If we need to peel for gaps, but no peeling is required, we have to
2596          peel VF iterations.  */
2597       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2598         *peel_iters_epilogue = vf;
2599     }
2600
2601   if (peel_iters_prologue)
2602     retval += record_stmt_cost (prologue_cost_vec,
2603                                 peel_iters_prologue * scalar_single_iter_cost,
2604                                 scalar_stmt, NULL, 0, vect_prologue);
2605   if (*peel_iters_epilogue)
2606     retval += record_stmt_cost (epilogue_cost_vec,
2607                                 *peel_iters_epilogue * scalar_single_iter_cost,
2608                                 scalar_stmt, NULL, 0, vect_epilogue);
2609   return retval;
2610 }
2611
2612 /* Function vect_estimate_min_profitable_iters
2613
2614    Return the number of iterations required for the vector version of the
2615    loop to be profitable relative to the cost of the scalar version of the
2616    loop.  */
2617
2618 static void
2619 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2620                                     int *ret_min_profitable_niters,
2621                                     int *ret_min_profitable_estimate)
2622 {
2623   int min_profitable_iters;
2624   int min_profitable_estimate;
2625   int peel_iters_prologue;
2626   int peel_iters_epilogue;
2627   unsigned vec_inside_cost = 0;
2628   int vec_outside_cost = 0;
2629   unsigned vec_prologue_cost = 0;
2630   unsigned vec_epilogue_cost = 0;
2631   int scalar_single_iter_cost = 0;
2632   int scalar_outside_cost = 0;
2633   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2634   int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2635   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2636
2637   /* Cost model disabled.  */
2638   if (!flag_vect_cost_model)
2639     {
2640       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.");
2641       *ret_min_profitable_niters = 0;
2642       *ret_min_profitable_estimate = 0;
2643       return;
2644     }
2645
2646   /* Requires loop versioning tests to handle misalignment.  */
2647   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2648     {
2649       /*  FIXME: Make cost depend on complexity of individual check.  */
2650       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2651       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2652                             vect_prologue);
2653       dump_printf (MSG_NOTE,
2654                    "cost model: Adding cost of checks for loop "
2655                    "versioning to treat misalignment.\n");
2656     }
2657
2658   /* Requires loop versioning with alias checks.  */
2659   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2660     {
2661       /*  FIXME: Make cost depend on complexity of individual check.  */
2662       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2663       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2664                             vect_prologue);
2665       dump_printf (MSG_NOTE,
2666                    "cost model: Adding cost of checks for loop "
2667                    "versioning aliasing.\n");
2668     }
2669
2670   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2671       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2672     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2673                           vect_prologue);
2674
2675   /* Count statements in scalar loop.  Using this as scalar cost for a single
2676      iteration for now.
2677
2678      TODO: Add outer loop support.
2679
2680      TODO: Consider assigning different costs to different scalar
2681      statements.  */
2682
2683   scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
2684
2685   /* Add additional cost for the peeled instructions in prologue and epilogue
2686      loop.
2687
2688      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2689      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2690
2691      TODO: Build an expression that represents peel_iters for prologue and
2692      epilogue to be used in a run-time test.  */
2693
2694   if (npeel  < 0)
2695     {
2696       peel_iters_prologue = vf/2;
2697       dump_printf (MSG_NOTE, "cost model: "
2698                    "prologue peel iters set to vf/2.");
2699
2700       /* If peeling for alignment is unknown, loop bound of main loop becomes
2701          unknown.  */
2702       peel_iters_epilogue = vf/2;
2703       dump_printf (MSG_NOTE, "cost model: "
2704                    "epilogue peel iters set to vf/2 because "
2705                    "peeling for alignment is unknown.");
2706
2707       /* If peeled iterations are unknown, count a taken branch and a not taken
2708          branch per peeled loop. Even if scalar loop iterations are known,
2709          vector iterations are not known since peeled prologue iterations are
2710          not known. Hence guards remain the same.  */
2711       (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2712                             NULL, 0, vect_prologue);
2713       (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2714                             NULL, 0, vect_prologue);
2715       /* FORNOW: Don't attempt to pass individual scalar instructions to
2716          the model; just assume linear cost for scalar iterations.  */
2717       (void) add_stmt_cost (target_cost_data,
2718                             peel_iters_prologue * scalar_single_iter_cost,
2719                             scalar_stmt, NULL, 0, vect_prologue);
2720       (void) add_stmt_cost (target_cost_data,
2721                             peel_iters_epilogue * scalar_single_iter_cost,
2722                             scalar_stmt, NULL, 0, vect_epilogue);
2723     }
2724   else
2725     {
2726       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2727       stmt_info_for_cost *si;
2728       int j;
2729       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2730
2731       prologue_cost_vec.create (2);
2732       epilogue_cost_vec.create (2);
2733       peel_iters_prologue = npeel;
2734
2735       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2736                                           &peel_iters_epilogue,
2737                                           scalar_single_iter_cost,
2738                                           &prologue_cost_vec,
2739                                           &epilogue_cost_vec);
2740
2741       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2742         {
2743           struct _stmt_vec_info *stmt_info
2744             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2745           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2746                                 si->misalign, vect_prologue);
2747         }
2748
2749       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2750         {
2751           struct _stmt_vec_info *stmt_info
2752             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2753           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2754                                 si->misalign, vect_epilogue);
2755         }
2756
2757       prologue_cost_vec.release ();
2758       epilogue_cost_vec.release ();
2759     }
2760
2761   /* FORNOW: The scalar outside cost is incremented in one of the
2762      following ways:
2763
2764      1. The vectorizer checks for alignment and aliasing and generates
2765      a condition that allows dynamic vectorization.  A cost model
2766      check is ANDED with the versioning condition.  Hence scalar code
2767      path now has the added cost of the versioning check.
2768
2769        if (cost > th & versioning_check)
2770          jmp to vector code
2771
2772      Hence run-time scalar is incremented by not-taken branch cost.
2773
2774      2. The vectorizer then checks if a prologue is required.  If the
2775      cost model check was not done before during versioning, it has to
2776      be done before the prologue check.
2777
2778        if (cost <= th)
2779          prologue = scalar_iters
2780        if (prologue == 0)
2781          jmp to vector code
2782        else
2783          execute prologue
2784        if (prologue == num_iters)
2785          go to exit
2786
2787      Hence the run-time scalar cost is incremented by a taken branch,
2788      plus a not-taken branch, plus a taken branch cost.
2789
2790      3. The vectorizer then checks if an epilogue is required.  If the
2791      cost model check was not done before during prologue check, it
2792      has to be done with the epilogue check.
2793
2794        if (prologue == 0)
2795          jmp to vector code
2796        else
2797          execute prologue
2798        if (prologue == num_iters)
2799          go to exit
2800        vector code:
2801          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2802            jmp to epilogue
2803
2804      Hence the run-time scalar cost should be incremented by 2 taken
2805      branches.
2806
2807      TODO: The back end may reorder the BBS's differently and reverse
2808      conditions/branch directions.  Change the estimates below to
2809      something more reasonable.  */
2810
2811   /* If the number of iterations is known and we do not do versioning, we can
2812      decide whether to vectorize at compile time.  Hence the scalar version
2813      do not carry cost model guard costs.  */
2814   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2815       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2816       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2817     {
2818       /* Cost model check occurs at versioning.  */
2819       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2820           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2821         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
2822       else
2823         {
2824           /* Cost model check occurs at prologue generation.  */
2825           if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2826             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2827               + vect_get_stmt_cost (cond_branch_not_taken);
2828           /* Cost model check occurs at epilogue generation.  */
2829           else
2830             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
2831         }
2832     }
2833
2834   /* Complete the target-specific cost calculations.  */
2835   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2836                &vec_inside_cost, &vec_epilogue_cost);
2837
2838   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
2839
2840   /* Calculate number of iterations required to make the vector version
2841      profitable, relative to the loop bodies only.  The following condition
2842      must hold true:
2843      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2844      where
2845      SIC = scalar iteration cost, VIC = vector iteration cost,
2846      VOC = vector outside cost, VF = vectorization factor,
2847      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2848      SOC = scalar outside cost for run time cost model check.  */
2849
2850   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
2851     {
2852       if (vec_outside_cost <= 0)
2853         min_profitable_iters = 1;
2854       else
2855         {
2856           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2857                                   - vec_inside_cost * peel_iters_prologue
2858                                   - vec_inside_cost * peel_iters_epilogue)
2859                                  / ((scalar_single_iter_cost * vf)
2860                                     - vec_inside_cost);
2861
2862           if ((scalar_single_iter_cost * vf * min_profitable_iters)
2863               <= (((int) vec_inside_cost * min_profitable_iters)
2864                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
2865             min_profitable_iters++;
2866         }
2867     }
2868   /* vector version will never be profitable.  */
2869   else
2870     {
2871       if (dump_enabled_p ())
2872         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2873                          "cost model: the vector iteration cost = %d "
2874                          "divided by the scalar iteration cost = %d "
2875                          "is greater or equal to the vectorization factor = %d.",
2876                          vec_inside_cost, scalar_single_iter_cost, vf);
2877       *ret_min_profitable_niters = -1;
2878       *ret_min_profitable_estimate = -1;
2879       return;
2880     }
2881
2882   if (dump_enabled_p ())
2883     {
2884       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2885       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
2886                    vec_inside_cost);
2887       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
2888                    vec_prologue_cost);
2889       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
2890                    vec_epilogue_cost);
2891       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
2892                    scalar_single_iter_cost);
2893       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
2894                    scalar_outside_cost);
2895       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
2896                    vec_outside_cost);
2897       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
2898                    peel_iters_prologue);
2899       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
2900                    peel_iters_epilogue);
2901       dump_printf (MSG_NOTE,
2902                    "  Calculated minimum iters for profitability: %d\n",
2903                    min_profitable_iters);
2904     }
2905
2906   min_profitable_iters =
2907         min_profitable_iters < vf ? vf : min_profitable_iters;
2908
2909   /* Because the condition we create is:
2910      if (niters <= min_profitable_iters)
2911        then skip the vectorized loop.  */
2912   min_profitable_iters--;
2913
2914   if (dump_enabled_p ())
2915     dump_printf_loc (MSG_NOTE, vect_location,
2916                      "  Runtime profitability threshold = %d\n", min_profitable_iters);
2917
2918   *ret_min_profitable_niters = min_profitable_iters;
2919
2920   /* Calculate number of iterations required to make the vector version
2921      profitable, relative to the loop bodies only.
2922
2923      Non-vectorized variant is SIC * niters and it must win over vector
2924      variant on the expected loop trip count.  The following condition must hold true:
2925      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
2926
2927   if (vec_outside_cost <= 0)
2928     min_profitable_estimate = 1;
2929   else
2930     {
2931       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
2932                                  - vec_inside_cost * peel_iters_prologue
2933                                  - vec_inside_cost * peel_iters_epilogue)
2934                                  / ((scalar_single_iter_cost * vf)
2935                                    - vec_inside_cost);
2936     }
2937   min_profitable_estimate --;
2938   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
2939   if (dump_enabled_p ())
2940     dump_printf_loc (MSG_NOTE, vect_location,
2941                      "  Static estimate profitability threshold = %d\n",
2942                       min_profitable_iters);
2943
2944   *ret_min_profitable_estimate = min_profitable_estimate;
2945 }
2946
2947
2948 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2949    functions. Design better to avoid maintenance issues.  */
2950
2951 /* Function vect_model_reduction_cost.
2952
2953    Models cost for a reduction operation, including the vector ops
2954    generated within the strip-mine loop, the initial definition before
2955    the loop, and the epilogue code that must be generated.  */
2956
2957 static bool
2958 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2959                            int ncopies)
2960 {
2961   int prologue_cost = 0, epilogue_cost = 0;
2962   enum tree_code code;
2963   optab optab;
2964   tree vectype;
2965   gimple stmt, orig_stmt;
2966   tree reduction_op;
2967   enum machine_mode mode;
2968   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2969   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2970   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2971
2972   /* Cost of reduction op inside loop.  */
2973   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
2974                                         stmt_info, 0, vect_body);
2975   stmt = STMT_VINFO_STMT (stmt_info);
2976
2977   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2978     {
2979     case GIMPLE_SINGLE_RHS:
2980       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2981       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2982       break;
2983     case GIMPLE_UNARY_RHS:
2984       reduction_op = gimple_assign_rhs1 (stmt);
2985       break;
2986     case GIMPLE_BINARY_RHS:
2987       reduction_op = gimple_assign_rhs2 (stmt);
2988       break;
2989     case GIMPLE_TERNARY_RHS:
2990       reduction_op = gimple_assign_rhs3 (stmt);
2991       break;
2992     default:
2993       gcc_unreachable ();
2994     }
2995
2996   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2997   if (!vectype)
2998     {
2999       if (dump_enabled_p ())
3000         {
3001           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3002                            "unsupported data-type ");
3003           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3004                              TREE_TYPE (reduction_op));
3005         }
3006       return false;
3007    }
3008
3009   mode = TYPE_MODE (vectype);
3010   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3011
3012   if (!orig_stmt)
3013     orig_stmt = STMT_VINFO_STMT (stmt_info);
3014
3015   code = gimple_assign_rhs_code (orig_stmt);
3016
3017   /* Add in cost for initial definition.  */
3018   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3019                                   stmt_info, 0, vect_prologue);
3020
3021   /* Determine cost of epilogue code.
3022
3023      We have a reduction operator that will reduce the vector in one statement.
3024      Also requires scalar extract.  */
3025
3026   if (!nested_in_vect_loop_p (loop, orig_stmt))
3027     {
3028       if (reduc_code != ERROR_MARK)
3029         {
3030           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3031                                           stmt_info, 0, vect_epilogue);
3032           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3033                                           stmt_info, 0, vect_epilogue);
3034         }
3035       else
3036         {
3037           int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3038           tree bitsize =
3039             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3040           int element_bitsize = tree_low_cst (bitsize, 1);
3041           int nelements = vec_size_in_bits / element_bitsize;
3042
3043           optab = optab_for_tree_code (code, vectype, optab_default);
3044
3045           /* We have a whole vector shift available.  */
3046           if (VECTOR_MODE_P (mode)
3047               && optab_handler (optab, mode) != CODE_FOR_nothing
3048               && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3049             {
3050               /* Final reduction via vector shifts and the reduction operator.
3051                  Also requires scalar extract.  */
3052               epilogue_cost += add_stmt_cost (target_cost_data,
3053                                               exact_log2 (nelements) * 2,
3054                                               vector_stmt, stmt_info, 0,
3055                                               vect_epilogue);
3056               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3057                                               vec_to_scalar, stmt_info, 0,
3058                                               vect_epilogue);
3059             }
3060           else
3061             /* Use extracts and reduction op for final reduction.  For N
3062                elements, we have N extracts and N-1 reduction ops.  */
3063             epilogue_cost += add_stmt_cost (target_cost_data,
3064                                             nelements + nelements - 1,
3065                                             vector_stmt, stmt_info, 0,
3066                                             vect_epilogue);
3067         }
3068     }
3069
3070   if (dump_enabled_p ())
3071     dump_printf (MSG_NOTE,
3072                  "vect_model_reduction_cost: inside_cost = %d, "
3073                  "prologue_cost = %d, epilogue_cost = %d .", inside_cost,
3074                  prologue_cost, epilogue_cost);
3075
3076   return true;
3077 }
3078
3079
3080 /* Function vect_model_induction_cost.
3081
3082    Models cost for induction operations.  */
3083
3084 static void
3085 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3086 {
3087   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3088   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3089   unsigned inside_cost, prologue_cost;
3090
3091   /* loop cost for vec_loop.  */
3092   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3093                                stmt_info, 0, vect_body);
3094
3095   /* prologue cost for vec_init and vec_step.  */
3096   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3097                                  stmt_info, 0, vect_prologue);
3098
3099   if (dump_enabled_p ())
3100     dump_printf_loc (MSG_NOTE, vect_location,
3101                      "vect_model_induction_cost: inside_cost = %d, "
3102                      "prologue_cost = %d .", inside_cost, prologue_cost);
3103 }
3104
3105
3106 /* Function get_initial_def_for_induction
3107
3108    Input:
3109    STMT - a stmt that performs an induction operation in the loop.
3110    IV_PHI - the initial value of the induction variable
3111
3112    Output:
3113    Return a vector variable, initialized with the first VF values of
3114    the induction variable.  E.g., for an iv with IV_PHI='X' and
3115    evolution S, for a vector of 4 units, we want to return:
3116    [X, X + S, X + 2*S, X + 3*S].  */
3117
3118 static tree
3119 get_initial_def_for_induction (gimple iv_phi)
3120 {
3121   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3122   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3123   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3124   tree vectype;
3125   int nunits;
3126   edge pe = loop_preheader_edge (loop);
3127   struct loop *iv_loop;
3128   basic_block new_bb;
3129   tree new_vec, vec_init, vec_step, t;
3130   tree new_var;
3131   tree new_name;
3132   gimple init_stmt, induction_phi, new_stmt;
3133   tree induc_def, vec_def, vec_dest;
3134   tree init_expr, step_expr;
3135   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3136   int i;
3137   int ncopies;
3138   tree expr;
3139   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3140   bool nested_in_vect_loop = false;
3141   gimple_seq stmts = NULL;
3142   imm_use_iterator imm_iter;
3143   use_operand_p use_p;
3144   gimple exit_phi;
3145   edge latch_e;
3146   tree loop_arg;
3147   gimple_stmt_iterator si;
3148   basic_block bb = gimple_bb (iv_phi);
3149   tree stepvectype;
3150   tree resvectype;
3151
3152   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3153   if (nested_in_vect_loop_p (loop, iv_phi))
3154     {
3155       nested_in_vect_loop = true;
3156       iv_loop = loop->inner;
3157     }
3158   else
3159     iv_loop = loop;
3160   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3161
3162   latch_e = loop_latch_edge (iv_loop);
3163   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3164
3165   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3166   gcc_assert (step_expr != NULL_TREE);
3167
3168   pe = loop_preheader_edge (iv_loop);
3169   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3170                                      loop_preheader_edge (iv_loop));
3171
3172   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3173   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3174   gcc_assert (vectype);
3175   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3176   ncopies = vf / nunits;
3177
3178   gcc_assert (phi_info);
3179   gcc_assert (ncopies >= 1);
3180
3181   /* Convert the step to the desired type.  */
3182   step_expr = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3183                                                   step_expr),
3184                                     &stmts, true, NULL_TREE);
3185   if (stmts)
3186     {
3187       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3188       gcc_assert (!new_bb);
3189     }
3190
3191   /* Find the first insertion point in the BB.  */
3192   si = gsi_after_labels (bb);
3193
3194   /* Create the vector that holds the initial_value of the induction.  */
3195   if (nested_in_vect_loop)
3196     {
3197       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3198          been created during vectorization of previous stmts.  We obtain it
3199          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3200       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi, NULL);
3201       /* If the initial value is not of proper type, convert it.  */
3202       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3203         {
3204           new_stmt = gimple_build_assign_with_ops
3205               (VIEW_CONVERT_EXPR,
3206                vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"),
3207                build1 (VIEW_CONVERT_EXPR, vectype, vec_init), NULL_TREE);
3208           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3209           gimple_assign_set_lhs (new_stmt, vec_init);
3210           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3211                                                  new_stmt);
3212           gcc_assert (!new_bb);
3213           set_vinfo_for_stmt (new_stmt,
3214                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3215         }
3216     }
3217   else
3218     {
3219       vec<constructor_elt, va_gc> *v;
3220
3221       /* iv_loop is the loop to be vectorized. Create:
3222          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3223       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3224                                        vect_scalar_var, "var_");
3225       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3226                                                      init_expr),
3227                                        &stmts, false, new_var);
3228       if (stmts)
3229         {
3230           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3231           gcc_assert (!new_bb);
3232         }
3233
3234       vec_alloc (v, nunits);
3235       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3236       for (i = 1; i < nunits; i++)
3237         {
3238           /* Create: new_name_i = new_name + step_expr  */
3239           init_stmt = gimple_build_assign_with_ops (PLUS_EXPR, new_var,
3240                                                     new_name, step_expr);
3241           new_name = make_ssa_name (new_var, init_stmt);
3242           gimple_assign_set_lhs (init_stmt, new_name);
3243
3244           new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3245           gcc_assert (!new_bb);
3246
3247           if (dump_enabled_p ())
3248             {
3249               dump_printf_loc (MSG_NOTE, vect_location,
3250                                "created new init_stmt: ");
3251               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3252             }
3253           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3254         }
3255       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3256       new_vec = build_constructor (vectype, v);
3257       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3258     }
3259
3260
3261   /* Create the vector that holds the step of the induction.  */
3262   if (nested_in_vect_loop)
3263     /* iv_loop is nested in the loop to be vectorized. Generate:
3264        vec_step = [S, S, S, S]  */
3265     new_name = step_expr;
3266   else
3267     {
3268       /* iv_loop is the loop to be vectorized. Generate:
3269           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3270       expr = build_int_cst (TREE_TYPE (step_expr), vf);
3271       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3272                               expr, step_expr);
3273     }
3274
3275   t = unshare_expr (new_name);
3276   gcc_assert (CONSTANT_CLASS_P (new_name));
3277   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3278   gcc_assert (stepvectype);
3279   new_vec = build_vector_from_val (stepvectype, t);
3280   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3281
3282
3283   /* Create the following def-use cycle:
3284      loop prolog:
3285          vec_init = ...
3286          vec_step = ...
3287      loop:
3288          vec_iv = PHI <vec_init, vec_loop>
3289          ...
3290          STMT
3291          ...
3292          vec_loop = vec_iv + vec_step;  */
3293
3294   /* Create the induction-phi that defines the induction-operand.  */
3295   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3296   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3297   set_vinfo_for_stmt (induction_phi,
3298                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3299   induc_def = PHI_RESULT (induction_phi);
3300
3301   /* Create the iv update inside the loop  */
3302   new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3303                                            induc_def, vec_step);
3304   vec_def = make_ssa_name (vec_dest, new_stmt);
3305   gimple_assign_set_lhs (new_stmt, vec_def);
3306   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3307   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3308                                                    NULL));
3309
3310   /* Set the arguments of the phi node:  */
3311   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3312   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3313                UNKNOWN_LOCATION);
3314
3315
3316   /* In case that vectorization factor (VF) is bigger than the number
3317      of elements that we can fit in a vectype (nunits), we have to generate
3318      more than one vector stmt - i.e - we need to "unroll" the
3319      vector stmt by a factor VF/nunits.  For more details see documentation
3320      in vectorizable_operation.  */
3321
3322   if (ncopies > 1)
3323     {
3324       stmt_vec_info prev_stmt_vinfo;
3325       /* FORNOW. This restriction should be relaxed.  */
3326       gcc_assert (!nested_in_vect_loop);
3327
3328       /* Create the vector that holds the step of the induction.  */
3329       expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3330       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3331                               expr, step_expr);
3332       t = unshare_expr (new_name);
3333       gcc_assert (CONSTANT_CLASS_P (new_name));
3334       new_vec = build_vector_from_val (stepvectype, t);
3335       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3336
3337       vec_def = induc_def;
3338       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3339       for (i = 1; i < ncopies; i++)
3340         {
3341           /* vec_i = vec_prev + vec_step  */
3342           new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
3343                                                    vec_def, vec_step);
3344           vec_def = make_ssa_name (vec_dest, new_stmt);
3345           gimple_assign_set_lhs (new_stmt, vec_def);
3346
3347           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3348           if (!useless_type_conversion_p (resvectype, vectype))
3349             {
3350               new_stmt = gimple_build_assign_with_ops
3351                   (VIEW_CONVERT_EXPR,
3352                    vect_get_new_vect_var (resvectype, vect_simple_var,
3353                                           "vec_iv_"),
3354                    build1 (VIEW_CONVERT_EXPR, resvectype,
3355                            gimple_assign_lhs (new_stmt)), NULL_TREE);
3356               gimple_assign_set_lhs (new_stmt,
3357                                      make_ssa_name
3358                                        (gimple_assign_lhs (new_stmt), new_stmt));
3359               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3360             }
3361           set_vinfo_for_stmt (new_stmt,
3362                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3363           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3364           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3365         }
3366     }
3367
3368   if (nested_in_vect_loop)
3369     {
3370       /* Find the loop-closed exit-phi of the induction, and record
3371          the final vector of induction results:  */
3372       exit_phi = NULL;
3373       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3374         {
3375           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
3376             {
3377               exit_phi = USE_STMT (use_p);
3378               break;
3379             }
3380         }
3381       if (exit_phi)
3382         {
3383           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3384           /* FORNOW. Currently not supporting the case that an inner-loop induction
3385              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3386           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3387                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3388
3389           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3390           if (dump_enabled_p ())
3391             {
3392               dump_printf_loc (MSG_NOTE, vect_location,
3393                                "vector of inductions after inner-loop:");
3394               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3395             }
3396         }
3397     }
3398
3399
3400   if (dump_enabled_p ())
3401     {
3402       dump_printf_loc (MSG_NOTE, vect_location,
3403                        "transform induction: created def-use cycle: ");
3404       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3405       dump_printf (MSG_NOTE, "\n");
3406       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3407                         SSA_NAME_DEF_STMT (vec_def), 0);
3408     }
3409
3410   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3411   if (!useless_type_conversion_p (resvectype, vectype))
3412     {
3413       new_stmt = gimple_build_assign_with_ops
3414          (VIEW_CONVERT_EXPR,
3415           vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
3416           build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
3417       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3418       gimple_assign_set_lhs (new_stmt, induc_def);
3419       si = gsi_after_labels (bb);
3420       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3421       set_vinfo_for_stmt (new_stmt,
3422                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3423       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3424         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3425     }
3426
3427   return induc_def;
3428 }
3429
3430
3431 /* Function get_initial_def_for_reduction
3432
3433    Input:
3434    STMT - a stmt that performs a reduction operation in the loop.
3435    INIT_VAL - the initial value of the reduction variable
3436
3437    Output:
3438    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3439         of the reduction (used for adjusting the epilog - see below).
3440    Return a vector variable, initialized according to the operation that STMT
3441         performs. This vector will be used as the initial value of the
3442         vector of partial results.
3443
3444    Option1 (adjust in epilog): Initialize the vector as follows:
3445      add/bit or/xor:    [0,0,...,0,0]
3446      mult/bit and:      [1,1,...,1,1]
3447      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3448    and when necessary (e.g. add/mult case) let the caller know
3449    that it needs to adjust the result by init_val.
3450
3451    Option2: Initialize the vector as follows:
3452      add/bit or/xor:    [init_val,0,0,...,0]
3453      mult/bit and:      [init_val,1,1,...,1]
3454      min/max/cond_expr: [init_val,init_val,...,init_val]
3455    and no adjustments are needed.
3456
3457    For example, for the following code:
3458
3459    s = init_val;
3460    for (i=0;i<n;i++)
3461      s = s + a[i];
3462
3463    STMT is 's = s + a[i]', and the reduction variable is 's'.
3464    For a vector of 4 units, we want to return either [0,0,0,init_val],
3465    or [0,0,0,0] and let the caller know that it needs to adjust
3466    the result at the end by 'init_val'.
3467
3468    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3469    initialization vector is simpler (same element in all entries), if
3470    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3471
3472    A cost model should help decide between these two schemes.  */
3473
3474 tree
3475 get_initial_def_for_reduction (gimple stmt, tree init_val,
3476                                tree *adjustment_def)
3477 {
3478   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3479   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3480   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3481   tree scalar_type = TREE_TYPE (init_val);
3482   tree vectype = get_vectype_for_scalar_type (scalar_type);
3483   int nunits;
3484   enum tree_code code = gimple_assign_rhs_code (stmt);
3485   tree def_for_init;
3486   tree init_def;
3487   tree *elts;
3488   int i;
3489   bool nested_in_vect_loop = false;
3490   tree init_value;
3491   REAL_VALUE_TYPE real_init_val = dconst0;
3492   int int_init_val = 0;
3493   gimple def_stmt = NULL;
3494
3495   gcc_assert (vectype);
3496   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3497
3498   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3499               || SCALAR_FLOAT_TYPE_P (scalar_type));
3500
3501   if (nested_in_vect_loop_p (loop, stmt))
3502     nested_in_vect_loop = true;
3503   else
3504     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3505
3506   /* In case of double reduction we only create a vector variable to be put
3507      in the reduction phi node.  The actual statement creation is done in
3508      vect_create_epilog_for_reduction.  */
3509   if (adjustment_def && nested_in_vect_loop
3510       && TREE_CODE (init_val) == SSA_NAME
3511       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3512       && gimple_code (def_stmt) == GIMPLE_PHI
3513       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3514       && vinfo_for_stmt (def_stmt)
3515       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3516           == vect_double_reduction_def)
3517     {
3518       *adjustment_def = NULL;
3519       return vect_create_destination_var (init_val, vectype);
3520     }
3521
3522   if (TREE_CONSTANT (init_val))
3523     {
3524       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3525         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3526       else
3527         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3528     }
3529   else
3530     init_value = init_val;
3531
3532   switch (code)
3533     {
3534       case WIDEN_SUM_EXPR:
3535       case DOT_PROD_EXPR:
3536       case PLUS_EXPR:
3537       case MINUS_EXPR:
3538       case BIT_IOR_EXPR:
3539       case BIT_XOR_EXPR:
3540       case MULT_EXPR:
3541       case BIT_AND_EXPR:
3542         /* ADJUSMENT_DEF is NULL when called from
3543            vect_create_epilog_for_reduction to vectorize double reduction.  */
3544         if (adjustment_def)
3545           {
3546             if (nested_in_vect_loop)
3547               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3548                                                               NULL);
3549             else
3550               *adjustment_def = init_val;
3551           }
3552
3553         if (code == MULT_EXPR)
3554           {
3555             real_init_val = dconst1;
3556             int_init_val = 1;
3557           }
3558
3559         if (code == BIT_AND_EXPR)
3560           int_init_val = -1;
3561
3562         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3563           def_for_init = build_real (scalar_type, real_init_val);
3564         else
3565           def_for_init = build_int_cst (scalar_type, int_init_val);
3566
3567         /* Create a vector of '0' or '1' except the first element.  */
3568         elts = XALLOCAVEC (tree, nunits);
3569         for (i = nunits - 2; i >= 0; --i)
3570           elts[i + 1] = def_for_init;
3571
3572         /* Option1: the first element is '0' or '1' as well.  */
3573         if (adjustment_def)
3574           {
3575             elts[0] = def_for_init;
3576             init_def = build_vector (vectype, elts);
3577             break;
3578           }
3579
3580         /* Option2: the first element is INIT_VAL.  */
3581         elts[0] = init_val;
3582         if (TREE_CONSTANT (init_val))
3583           init_def = build_vector (vectype, elts);
3584         else
3585           {
3586             vec<constructor_elt, va_gc> *v;
3587             vec_alloc (v, nunits);
3588             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3589             for (i = 1; i < nunits; ++i)
3590               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3591             init_def = build_constructor (vectype, v);
3592           }
3593
3594         break;
3595
3596       case MIN_EXPR:
3597       case MAX_EXPR:
3598       case COND_EXPR:
3599         if (adjustment_def)
3600           {
3601             *adjustment_def = NULL_TREE;
3602             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3603             break;
3604           }
3605
3606         init_def = build_vector_from_val (vectype, init_value);
3607         break;
3608
3609       default:
3610         gcc_unreachable ();
3611     }
3612
3613   return init_def;
3614 }
3615
3616
3617 /* Function vect_create_epilog_for_reduction
3618
3619    Create code at the loop-epilog to finalize the result of a reduction
3620    computation.
3621
3622    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3623      reduction statements.
3624    STMT is the scalar reduction stmt that is being vectorized.
3625    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3626      number of elements that we can fit in a vectype (nunits).  In this case
3627      we have to generate more than one vector stmt - i.e - we need to "unroll"
3628      the vector stmt by a factor VF/nunits.  For more details see documentation
3629      in vectorizable_operation.
3630    REDUC_CODE is the tree-code for the epilog reduction.
3631    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3632      computation.
3633    REDUC_INDEX is the index of the operand in the right hand side of the
3634      statement that is defined by REDUCTION_PHI.
3635    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3636    SLP_NODE is an SLP node containing a group of reduction statements. The
3637      first one in this group is STMT.
3638
3639    This function:
3640    1. Creates the reduction def-use cycles: sets the arguments for
3641       REDUCTION_PHIS:
3642       The loop-entry argument is the vectorized initial-value of the reduction.
3643       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3644       sums.
3645    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3646       by applying the operation specified by REDUC_CODE if available, or by
3647       other means (whole-vector shifts or a scalar loop).
3648       The function also creates a new phi node at the loop exit to preserve
3649       loop-closed form, as illustrated below.
3650
3651      The flow at the entry to this function:
3652
3653         loop:
3654           vec_def = phi <null, null>            # REDUCTION_PHI
3655           VECT_DEF = vector_stmt                # vectorized form of STMT
3656           s_loop = scalar_stmt                  # (scalar) STMT
3657         loop_exit:
3658           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3659           use <s_out0>
3660           use <s_out0>
3661
3662      The above is transformed by this function into:
3663
3664         loop:
3665           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3666           VECT_DEF = vector_stmt                # vectorized form of STMT
3667           s_loop = scalar_stmt                  # (scalar) STMT
3668         loop_exit:
3669           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
3670           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
3671           v_out2 = reduce <v_out1>
3672           s_out3 = extract_field <v_out2, 0>
3673           s_out4 = adjust_result <s_out3>
3674           use <s_out4>
3675           use <s_out4>
3676 */
3677
3678 static void
3679 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
3680                                   int ncopies, enum tree_code reduc_code,
3681                                   vec<gimple> reduction_phis,
3682                                   int reduc_index, bool double_reduc,
3683                                   slp_tree slp_node)
3684 {
3685   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3686   stmt_vec_info prev_phi_info;
3687   tree vectype;
3688   enum machine_mode mode;
3689   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3690   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3691   basic_block exit_bb;
3692   tree scalar_dest;
3693   tree scalar_type;
3694   gimple new_phi = NULL, phi;
3695   gimple_stmt_iterator exit_gsi;
3696   tree vec_dest;
3697   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3698   gimple epilog_stmt = NULL;
3699   enum tree_code code = gimple_assign_rhs_code (stmt);
3700   gimple exit_phi;
3701   tree bitsize, bitpos;
3702   tree adjustment_def = NULL;
3703   tree vec_initial_def = NULL;
3704   tree reduction_op, expr, def;
3705   tree orig_name, scalar_result;
3706   imm_use_iterator imm_iter, phi_imm_iter;
3707   use_operand_p use_p, phi_use_p;
3708   bool extract_scalar_result = false;
3709   gimple use_stmt, orig_stmt, reduction_phi = NULL;
3710   bool nested_in_vect_loop = false;
3711   vec<gimple> new_phis = vNULL;
3712   vec<gimple> inner_phis = vNULL;
3713   enum vect_def_type dt = vect_unknown_def_type;
3714   int j, i;
3715   vec<tree> scalar_results = vNULL;
3716   unsigned int group_size = 1, k, ratio;
3717   vec<tree> vec_initial_defs = vNULL;
3718   vec<gimple> phis;
3719   bool slp_reduc = false;
3720   tree new_phi_result;
3721   gimple inner_phi = NULL;
3722
3723   if (slp_node)
3724     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
3725
3726   if (nested_in_vect_loop_p (loop, stmt))
3727     {
3728       outer_loop = loop;
3729       loop = loop->inner;
3730       nested_in_vect_loop = true;
3731       gcc_assert (!slp_node);
3732     }
3733
3734   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3735     {
3736     case GIMPLE_SINGLE_RHS:
3737       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3738                   == ternary_op);
3739       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3740       break;
3741     case GIMPLE_UNARY_RHS:
3742       reduction_op = gimple_assign_rhs1 (stmt);
3743       break;
3744     case GIMPLE_BINARY_RHS:
3745       reduction_op = reduc_index ?
3746                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3747       break;
3748     case GIMPLE_TERNARY_RHS:
3749       reduction_op = gimple_op (stmt, reduc_index + 1);
3750       break;
3751     default:
3752       gcc_unreachable ();
3753     }
3754
3755   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3756   gcc_assert (vectype);
3757   mode = TYPE_MODE (vectype);
3758
3759   /* 1. Create the reduction def-use cycle:
3760      Set the arguments of REDUCTION_PHIS, i.e., transform
3761
3762         loop:
3763           vec_def = phi <null, null>            # REDUCTION_PHI
3764           VECT_DEF = vector_stmt                # vectorized form of STMT
3765           ...
3766
3767      into:
3768
3769         loop:
3770           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
3771           VECT_DEF = vector_stmt                # vectorized form of STMT
3772           ...
3773
3774      (in case of SLP, do it for all the phis). */
3775
3776   /* Get the loop-entry arguments.  */
3777   if (slp_node)
3778     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
3779                        NULL, slp_node, reduc_index);
3780   else
3781     {
3782       vec_initial_defs.create (1);
3783      /* For the case of reduction, vect_get_vec_def_for_operand returns
3784         the scalar def before the loop, that defines the initial value
3785         of the reduction variable.  */
3786       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3787                                                       &adjustment_def);
3788       vec_initial_defs.quick_push (vec_initial_def);
3789     }
3790
3791   /* Set phi nodes arguments.  */
3792   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
3793     {
3794       tree vec_init_def, def;
3795       gimple_seq stmts;
3796       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
3797                                            true, NULL_TREE);
3798       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3799       def = vect_defs[i];
3800       for (j = 0; j < ncopies; j++)
3801         {
3802           /* Set the loop-entry arg of the reduction-phi.  */
3803           add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3804                        UNKNOWN_LOCATION);
3805
3806           /* Set the loop-latch arg for the reduction-phi.  */
3807           if (j > 0)
3808             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3809
3810           add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3811
3812           if (dump_enabled_p ())
3813             {
3814               dump_printf_loc (MSG_NOTE, vect_location,
3815                                "transform reduction: created def-use cycle: ");
3816               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
3817               dump_printf (MSG_NOTE, "\n");
3818               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
3819             }
3820
3821           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3822         }
3823     }
3824
3825   vec_initial_defs.release ();
3826
3827   /* 2. Create epilog code.
3828         The reduction epilog code operates across the elements of the vector
3829         of partial results computed by the vectorized loop.
3830         The reduction epilog code consists of:
3831
3832         step 1: compute the scalar result in a vector (v_out2)
3833         step 2: extract the scalar result (s_out3) from the vector (v_out2)
3834         step 3: adjust the scalar result (s_out3) if needed.
3835
3836         Step 1 can be accomplished using one the following three schemes:
3837           (scheme 1) using reduc_code, if available.
3838           (scheme 2) using whole-vector shifts, if available.
3839           (scheme 3) using a scalar loop. In this case steps 1+2 above are
3840                      combined.
3841
3842           The overall epilog code looks like this:
3843
3844           s_out0 = phi <s_loop>         # original EXIT_PHI
3845           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3846           v_out2 = reduce <v_out1>              # step 1
3847           s_out3 = extract_field <v_out2, 0>    # step 2
3848           s_out4 = adjust_result <s_out3>       # step 3
3849
3850           (step 3 is optional, and steps 1 and 2 may be combined).
3851           Lastly, the uses of s_out0 are replaced by s_out4.  */
3852
3853
3854   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3855          v_out1 = phi <VECT_DEF>
3856          Store them in NEW_PHIS.  */
3857
3858   exit_bb = single_exit (loop)->dest;
3859   prev_phi_info = NULL;
3860   new_phis.create (vect_defs.length ());
3861   FOR_EACH_VEC_ELT (vect_defs, i, def)
3862     {
3863       for (j = 0; j < ncopies; j++)
3864         {
3865           tree new_def = copy_ssa_name (def, NULL);
3866           phi = create_phi_node (new_def, exit_bb);
3867           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3868           if (j == 0)
3869             new_phis.quick_push (phi);
3870           else
3871             {
3872               def = vect_get_vec_def_for_stmt_copy (dt, def);
3873               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3874             }
3875
3876           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3877           prev_phi_info = vinfo_for_stmt (phi);
3878         }
3879     }
3880
3881   /* The epilogue is created for the outer-loop, i.e., for the loop being
3882      vectorized.  Create exit phis for the outer loop.  */
3883   if (double_reduc)
3884     {
3885       loop = outer_loop;
3886       exit_bb = single_exit (loop)->dest;
3887       inner_phis.create (vect_defs.length ());
3888       FOR_EACH_VEC_ELT (new_phis, i, phi)
3889         {
3890           tree new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3891           gimple outer_phi = create_phi_node (new_result, exit_bb);
3892           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3893                            PHI_RESULT (phi));
3894           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3895                                                             loop_vinfo, NULL));
3896           inner_phis.quick_push (phi);
3897           new_phis[i] = outer_phi;
3898           prev_phi_info = vinfo_for_stmt (outer_phi);
3899           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
3900             {
3901               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3902               new_result = copy_ssa_name (PHI_RESULT (phi), NULL);
3903               outer_phi = create_phi_node (new_result, exit_bb);
3904               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
3905                                PHI_RESULT (phi));
3906               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
3907                                                         loop_vinfo, NULL));
3908               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
3909               prev_phi_info = vinfo_for_stmt (outer_phi);
3910             }
3911         }
3912     }
3913
3914   exit_gsi = gsi_after_labels (exit_bb);
3915
3916   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3917          (i.e. when reduc_code is not available) and in the final adjustment
3918          code (if needed).  Also get the original scalar reduction variable as
3919          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3920          represents a reduction pattern), the tree-code and scalar-def are
3921          taken from the original stmt that the pattern-stmt (STMT) replaces.
3922          Otherwise (it is a regular reduction) - the tree-code and scalar-def
3923          are taken from STMT.  */
3924
3925   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3926   if (!orig_stmt)
3927     {
3928       /* Regular reduction  */
3929       orig_stmt = stmt;
3930     }
3931   else
3932     {
3933       /* Reduction pattern  */
3934       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3935       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3936       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3937     }
3938
3939   code = gimple_assign_rhs_code (orig_stmt);
3940   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3941      partial results are added and not subtracted.  */
3942   if (code == MINUS_EXPR)
3943     code = PLUS_EXPR;
3944
3945   scalar_dest = gimple_assign_lhs (orig_stmt);
3946   scalar_type = TREE_TYPE (scalar_dest);
3947   scalar_results.create (group_size);
3948   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3949   bitsize = TYPE_SIZE (scalar_type);
3950
3951   /* In case this is a reduction in an inner-loop while vectorizing an outer
3952      loop - we don't need to extract a single scalar result at the end of the
3953      inner-loop (unless it is double reduction, i.e., the use of reduction is
3954      outside the outer-loop).  The final vector of partial results will be used
3955      in the vectorized outer-loop, or reduced to a scalar result at the end of
3956      the outer-loop.  */
3957   if (nested_in_vect_loop && !double_reduc)
3958     goto vect_finalize_reduction;
3959
3960   /* SLP reduction without reduction chain, e.g.,
3961      # a1 = phi <a2, a0>
3962      # b1 = phi <b2, b0>
3963      a2 = operation (a1)
3964      b2 = operation (b1)  */
3965   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
3966
3967   /* In case of reduction chain, e.g.,
3968      # a1 = phi <a3, a0>
3969      a2 = operation (a1)
3970      a3 = operation (a2),
3971
3972      we may end up with more than one vector result.  Here we reduce them to
3973      one vector.  */
3974   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
3975     {
3976       tree first_vect = PHI_RESULT (new_phis[0]);
3977       tree tmp;
3978       gimple new_vec_stmt = NULL;
3979
3980       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3981       for (k = 1; k < new_phis.length (); k++)
3982         {
3983           gimple next_phi = new_phis[k];
3984           tree second_vect = PHI_RESULT (next_phi);
3985
3986           tmp = build2 (code, vectype,  first_vect, second_vect);
3987           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
3988           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
3989           gimple_assign_set_lhs (new_vec_stmt, first_vect);
3990           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
3991         }
3992
3993       new_phi_result = first_vect;
3994       if (new_vec_stmt)
3995         {
3996           new_phis.truncate (0);
3997           new_phis.safe_push (new_vec_stmt);
3998         }
3999     }
4000   else
4001     new_phi_result = PHI_RESULT (new_phis[0]);
4002
4003   /* 2.3 Create the reduction code, using one of the three schemes described
4004          above. In SLP we simply need to extract all the elements from the
4005          vector (without reducing them), so we use scalar shifts.  */
4006   if (reduc_code != ERROR_MARK && !slp_reduc)
4007     {
4008       tree tmp;
4009
4010       /*** Case 1:  Create:
4011            v_out2 = reduc_expr <v_out1>  */
4012
4013       if (dump_enabled_p ())
4014         dump_printf_loc (MSG_NOTE, vect_location,
4015                          "Reduce using direct vector reduction.");
4016
4017       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4018       tmp = build1 (reduc_code, vectype, new_phi_result);
4019       epilog_stmt = gimple_build_assign (vec_dest, tmp);
4020       new_temp = make_ssa_name (vec_dest, epilog_stmt);
4021       gimple_assign_set_lhs (epilog_stmt, new_temp);
4022       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4023
4024       extract_scalar_result = true;
4025     }
4026   else
4027     {
4028       enum tree_code shift_code = ERROR_MARK;
4029       bool have_whole_vector_shift = true;
4030       int bit_offset;
4031       int element_bitsize = tree_low_cst (bitsize, 1);
4032       int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4033       tree vec_temp;
4034
4035       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
4036         shift_code = VEC_RSHIFT_EXPR;
4037       else
4038         have_whole_vector_shift = false;
4039
4040       /* Regardless of whether we have a whole vector shift, if we're
4041          emulating the operation via tree-vect-generic, we don't want
4042          to use it.  Only the first round of the reduction is likely
4043          to still be profitable via emulation.  */
4044       /* ??? It might be better to emit a reduction tree code here, so that
4045          tree-vect-generic can expand the first round via bit tricks.  */
4046       if (!VECTOR_MODE_P (mode))
4047         have_whole_vector_shift = false;
4048       else
4049         {
4050           optab optab = optab_for_tree_code (code, vectype, optab_default);
4051           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4052             have_whole_vector_shift = false;
4053         }
4054
4055       if (have_whole_vector_shift && !slp_reduc)
4056         {
4057           /*** Case 2: Create:
4058              for (offset = VS/2; offset >= element_size; offset/=2)
4059                 {
4060                   Create:  va' = vec_shift <va, offset>
4061                   Create:  va = vop <va, va'>
4062                 }  */
4063
4064           if (dump_enabled_p ())
4065             dump_printf_loc (MSG_NOTE, vect_location,
4066                              "Reduce using vector shifts");
4067
4068           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4069           new_temp = new_phi_result;
4070           for (bit_offset = vec_size_in_bits/2;
4071                bit_offset >= element_bitsize;
4072                bit_offset /= 2)
4073             {
4074               tree bitpos = size_int (bit_offset);
4075
4076               epilog_stmt = gimple_build_assign_with_ops (shift_code,
4077                                                vec_dest, new_temp, bitpos);
4078               new_name = make_ssa_name (vec_dest, epilog_stmt);
4079               gimple_assign_set_lhs (epilog_stmt, new_name);
4080               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4081
4082               epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
4083                                                           new_name, new_temp);
4084               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4085               gimple_assign_set_lhs (epilog_stmt, new_temp);
4086               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4087             }
4088
4089           extract_scalar_result = true;
4090         }
4091       else
4092         {
4093           tree rhs;
4094
4095           /*** Case 3: Create:
4096              s = extract_field <v_out2, 0>
4097              for (offset = element_size;
4098                   offset < vector_size;
4099                   offset += element_size;)
4100                {
4101                  Create:  s' = extract_field <v_out2, offset>
4102                  Create:  s = op <s, s'>  // For non SLP cases
4103                }  */
4104
4105           if (dump_enabled_p ())
4106             dump_printf_loc (MSG_NOTE, vect_location,
4107                              "Reduce using scalar code. ");
4108
4109           vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
4110           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4111             {
4112               if (gimple_code (new_phi) == GIMPLE_PHI)
4113                 vec_temp = PHI_RESULT (new_phi);
4114               else
4115                 vec_temp = gimple_assign_lhs (new_phi);
4116               rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4117                             bitsize_zero_node);
4118               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4119               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4120               gimple_assign_set_lhs (epilog_stmt, new_temp);
4121               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4122
4123               /* In SLP we don't need to apply reduction operation, so we just
4124                  collect s' values in SCALAR_RESULTS.  */
4125               if (slp_reduc)
4126                 scalar_results.safe_push (new_temp);
4127
4128               for (bit_offset = element_bitsize;
4129                    bit_offset < vec_size_in_bits;
4130                    bit_offset += element_bitsize)
4131                 {
4132                   tree bitpos = bitsize_int (bit_offset);
4133                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4134                                      bitsize, bitpos);
4135
4136                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4137                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4138                   gimple_assign_set_lhs (epilog_stmt, new_name);
4139                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4140
4141                   if (slp_reduc)
4142                     {
4143                       /* In SLP we don't need to apply reduction operation, so
4144                          we just collect s' values in SCALAR_RESULTS.  */
4145                       new_temp = new_name;
4146                       scalar_results.safe_push (new_name);
4147                     }
4148                   else
4149                     {
4150                       epilog_stmt = gimple_build_assign_with_ops (code,
4151                                           new_scalar_dest, new_name, new_temp);
4152                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4153                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4154                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4155                     }
4156                 }
4157             }
4158
4159           /* The only case where we need to reduce scalar results in SLP, is
4160              unrolling.  If the size of SCALAR_RESULTS is greater than
4161              GROUP_SIZE, we reduce them combining elements modulo
4162              GROUP_SIZE.  */
4163           if (slp_reduc)
4164             {
4165               tree res, first_res, new_res;
4166               gimple new_stmt;
4167
4168               /* Reduce multiple scalar results in case of SLP unrolling.  */
4169               for (j = group_size; scalar_results.iterate (j, &res);
4170                    j++)
4171                 {
4172                   first_res = scalar_results[j % group_size];
4173                   new_stmt = gimple_build_assign_with_ops (code,
4174                                               new_scalar_dest, first_res, res);
4175                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4176                   gimple_assign_set_lhs (new_stmt, new_res);
4177                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4178                   scalar_results[j % group_size] = new_res;
4179                 }
4180             }
4181           else
4182             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4183             scalar_results.safe_push (new_temp);
4184
4185           extract_scalar_result = false;
4186         }
4187     }
4188
4189   /* 2.4  Extract the final scalar result.  Create:
4190           s_out3 = extract_field <v_out2, bitpos>  */
4191
4192   if (extract_scalar_result)
4193     {
4194       tree rhs;
4195
4196       if (dump_enabled_p ())
4197         dump_printf_loc (MSG_NOTE, vect_location,
4198                          "extract scalar result");
4199
4200       if (BYTES_BIG_ENDIAN)
4201         bitpos = size_binop (MULT_EXPR,
4202                              bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
4203                              TYPE_SIZE (scalar_type));
4204       else
4205         bitpos = bitsize_zero_node;
4206
4207       rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
4208       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4209       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4210       gimple_assign_set_lhs (epilog_stmt, new_temp);
4211       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4212       scalar_results.safe_push (new_temp);
4213     }
4214
4215 vect_finalize_reduction:
4216
4217   if (double_reduc)
4218     loop = loop->inner;
4219
4220   /* 2.5 Adjust the final result by the initial value of the reduction
4221          variable. (When such adjustment is not needed, then
4222          'adjustment_def' is zero).  For example, if code is PLUS we create:
4223          new_temp = loop_exit_def + adjustment_def  */
4224
4225   if (adjustment_def)
4226     {
4227       gcc_assert (!slp_reduc);
4228       if (nested_in_vect_loop)
4229         {
4230           new_phi = new_phis[0];
4231           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4232           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4233           new_dest = vect_create_destination_var (scalar_dest, vectype);
4234         }
4235       else
4236         {
4237           new_temp = scalar_results[0];
4238           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4239           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4240           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4241         }
4242
4243       epilog_stmt = gimple_build_assign (new_dest, expr);
4244       new_temp = make_ssa_name (new_dest, epilog_stmt);
4245       gimple_assign_set_lhs (epilog_stmt, new_temp);
4246       SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
4247       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4248       if (nested_in_vect_loop)
4249         {
4250           set_vinfo_for_stmt (epilog_stmt,
4251                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4252                                                  NULL));
4253           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4254                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4255
4256           if (!double_reduc)
4257             scalar_results.quick_push (new_temp);
4258           else
4259             scalar_results[0] = new_temp;
4260         }
4261       else
4262         scalar_results[0] = new_temp;
4263
4264       new_phis[0] = epilog_stmt;
4265     }
4266
4267   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4268           phis with new adjusted scalar results, i.e., replace use <s_out0>
4269           with use <s_out4>.
4270
4271      Transform:
4272         loop_exit:
4273           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4274           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4275           v_out2 = reduce <v_out1>
4276           s_out3 = extract_field <v_out2, 0>
4277           s_out4 = adjust_result <s_out3>
4278           use <s_out0>
4279           use <s_out0>
4280
4281      into:
4282
4283         loop_exit:
4284           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4285           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4286           v_out2 = reduce <v_out1>
4287           s_out3 = extract_field <v_out2, 0>
4288           s_out4 = adjust_result <s_out3>
4289           use <s_out4>
4290           use <s_out4> */
4291
4292
4293   /* In SLP reduction chain we reduce vector results into one vector if
4294      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4295      the last stmt in the reduction chain, since we are looking for the loop
4296      exit phi node.  */
4297   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4298     {
4299       scalar_dest = gimple_assign_lhs (
4300                         SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
4301       group_size = 1;
4302     }
4303
4304   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4305      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4306      need to match SCALAR_RESULTS with corresponding statements.  The first
4307      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4308      the first vector stmt, etc.
4309      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4310   if (group_size > new_phis.length ())
4311     {
4312       ratio = group_size / new_phis.length ();
4313       gcc_assert (!(group_size % new_phis.length ()));
4314     }
4315   else
4316     ratio = 1;
4317
4318   for (k = 0; k < group_size; k++)
4319     {
4320       if (k % ratio == 0)
4321         {
4322           epilog_stmt = new_phis[k / ratio];
4323           reduction_phi = reduction_phis[k / ratio];
4324           if (double_reduc)
4325             inner_phi = inner_phis[k / ratio];
4326         }
4327
4328       if (slp_reduc)
4329         {
4330           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4331
4332           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4333           /* SLP statements can't participate in patterns.  */
4334           gcc_assert (!orig_stmt);
4335           scalar_dest = gimple_assign_lhs (current_stmt);
4336         }
4337
4338       phis.create (3);
4339       /* Find the loop-closed-use at the loop exit of the original scalar
4340          result.  (The reduction result is expected to have two immediate uses -
4341          one at the latch block, and one at the loop exit).  */
4342       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4343         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4344             && !is_gimple_debug (USE_STMT (use_p)))
4345           phis.safe_push (USE_STMT (use_p));
4346
4347       /* While we expect to have found an exit_phi because of loop-closed-ssa
4348          form we can end up without one if the scalar cycle is dead.  */
4349
4350       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4351         {
4352           if (outer_loop)
4353             {
4354               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4355               gimple vect_phi;
4356
4357               /* FORNOW. Currently not supporting the case that an inner-loop
4358                  reduction is not used in the outer-loop (but only outside the
4359                  outer-loop), unless it is double reduction.  */
4360               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4361                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4362                           || double_reduc);
4363
4364               STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4365               if (!double_reduc
4366                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4367                       != vect_double_reduction_def)
4368                 continue;
4369
4370               /* Handle double reduction:
4371
4372                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4373                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4374                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4375                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4376
4377                  At that point the regular reduction (stmt2 and stmt3) is
4378                  already vectorized, as well as the exit phi node, stmt4.
4379                  Here we vectorize the phi node of double reduction, stmt1, and
4380                  update all relevant statements.  */
4381
4382               /* Go through all the uses of s2 to find double reduction phi
4383                  node, i.e., stmt1 above.  */
4384               orig_name = PHI_RESULT (exit_phi);
4385               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4386                 {
4387                   stmt_vec_info use_stmt_vinfo;
4388                   stmt_vec_info new_phi_vinfo;
4389                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4390                   basic_block bb = gimple_bb (use_stmt);
4391                   gimple use;
4392
4393                   /* Check that USE_STMT is really double reduction phi
4394                      node.  */
4395                   if (gimple_code (use_stmt) != GIMPLE_PHI
4396                       || gimple_phi_num_args (use_stmt) != 2
4397                       || bb->loop_father != outer_loop)
4398                     continue;
4399                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4400                   if (!use_stmt_vinfo
4401                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4402                           != vect_double_reduction_def)
4403                     continue;
4404
4405                   /* Create vector phi node for double reduction:
4406                      vs1 = phi <vs0, vs2>
4407                      vs1 was created previously in this function by a call to
4408                        vect_get_vec_def_for_operand and is stored in
4409                        vec_initial_def;
4410                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4411                      vs0 is created here.  */
4412
4413                   /* Create vector phi node.  */
4414                   vect_phi = create_phi_node (vec_initial_def, bb);
4415                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4416                                     loop_vec_info_for_loop (outer_loop), NULL);
4417                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4418
4419                   /* Create vs0 - initial def of the double reduction phi.  */
4420                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4421                                              loop_preheader_edge (outer_loop));
4422                   init_def = get_initial_def_for_reduction (stmt,
4423                                                           preheader_arg, NULL);
4424                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4425                                                     vectype, NULL);
4426
4427                   /* Update phi node arguments with vs0 and vs2.  */
4428                   add_phi_arg (vect_phi, vect_phi_init,
4429                                loop_preheader_edge (outer_loop),
4430                                UNKNOWN_LOCATION);
4431                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4432                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4433                   if (dump_enabled_p ())
4434                     {
4435                       dump_printf_loc (MSG_NOTE, vect_location,
4436                                        "created double reduction phi node: ");
4437                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4438                     }
4439
4440                   vect_phi_res = PHI_RESULT (vect_phi);
4441
4442                   /* Replace the use, i.e., set the correct vs1 in the regular
4443                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4444                      loop is redundant.  */
4445                   use = reduction_phi;
4446                   for (j = 0; j < ncopies; j++)
4447                     {
4448                       edge pr_edge = loop_preheader_edge (loop);
4449                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4450                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4451                     }
4452                 }
4453             }
4454         }
4455
4456       phis.release ();
4457       if (nested_in_vect_loop)
4458         {
4459           if (double_reduc)
4460             loop = outer_loop;
4461           else
4462             continue;
4463         }
4464
4465       phis.create (3);
4466       /* Find the loop-closed-use at the loop exit of the original scalar
4467          result.  (The reduction result is expected to have two immediate uses,
4468          one at the latch block, and one at the loop exit).  For double
4469          reductions we are looking for exit phis of the outer loop.  */
4470       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4471         {
4472           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4473             {
4474               if (!is_gimple_debug (USE_STMT (use_p)))
4475                 phis.safe_push (USE_STMT (use_p));
4476             }
4477           else
4478             {
4479               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4480                 {
4481                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4482
4483                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4484                     {
4485                       if (!flow_bb_inside_loop_p (loop,
4486                                              gimple_bb (USE_STMT (phi_use_p)))
4487                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4488                         phis.safe_push (USE_STMT (phi_use_p));
4489                     }
4490                 }
4491             }
4492         }
4493
4494       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4495         {
4496           /* Replace the uses:  */
4497           orig_name = PHI_RESULT (exit_phi);
4498           scalar_result = scalar_results[k];
4499           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4500             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4501               SET_USE (use_p, scalar_result);
4502         }
4503
4504       phis.release ();
4505     }
4506
4507   scalar_results.release ();
4508   inner_phis.release ();
4509   new_phis.release ();
4510 }
4511
4512
4513 /* Function vectorizable_reduction.
4514
4515    Check if STMT performs a reduction operation that can be vectorized.
4516    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4517    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4518    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4519
4520    This function also handles reduction idioms (patterns) that have been
4521    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4522    of this form:
4523      X = pattern_expr (arg0, arg1, ..., X)
4524    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4525    sequence that had been detected and replaced by the pattern-stmt (STMT).
4526
4527    In some cases of reduction patterns, the type of the reduction variable X is
4528    different than the type of the other arguments of STMT.
4529    In such cases, the vectype that is used when transforming STMT into a vector
4530    stmt is different than the vectype that is used to determine the
4531    vectorization factor, because it consists of a different number of elements
4532    than the actual number of elements that are being operated upon in parallel.
4533
4534    For example, consider an accumulation of shorts into an int accumulator.
4535    On some targets it's possible to vectorize this pattern operating on 8
4536    shorts at a time (hence, the vectype for purposes of determining the
4537    vectorization factor should be V8HI); on the other hand, the vectype that
4538    is used to create the vector form is actually V4SI (the type of the result).
4539
4540    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4541    indicates what is the actual level of parallelism (V8HI in the example), so
4542    that the right vectorization factor would be derived.  This vectype
4543    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4544    be used to create the vectorized stmt.  The right vectype for the vectorized
4545    stmt is obtained from the type of the result X:
4546         get_vectype_for_scalar_type (TREE_TYPE (X))
4547
4548    This means that, contrary to "regular" reductions (or "regular" stmts in
4549    general), the following equation:
4550       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4551    does *NOT* necessarily hold for reduction patterns.  */
4552
4553 bool
4554 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4555                         gimple *vec_stmt, slp_tree slp_node)
4556 {
4557   tree vec_dest;
4558   tree scalar_dest;
4559   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4560   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4561   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4562   tree vectype_in = NULL_TREE;
4563   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4564   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4565   enum tree_code code, orig_code, epilog_reduc_code;
4566   enum machine_mode vec_mode;
4567   int op_type;
4568   optab optab, reduc_optab;
4569   tree new_temp = NULL_TREE;
4570   tree def;
4571   gimple def_stmt;
4572   enum vect_def_type dt;
4573   gimple new_phi = NULL;
4574   tree scalar_type;
4575   bool is_simple_use;
4576   gimple orig_stmt;
4577   stmt_vec_info orig_stmt_info;
4578   tree expr = NULL_TREE;
4579   int i;
4580   int ncopies;
4581   int epilog_copies;
4582   stmt_vec_info prev_stmt_info, prev_phi_info;
4583   bool single_defuse_cycle = false;
4584   tree reduc_def = NULL_TREE;
4585   gimple new_stmt = NULL;
4586   int j;
4587   tree ops[3];
4588   bool nested_cycle = false, found_nested_cycle_def = false;
4589   gimple reduc_def_stmt = NULL;
4590   /* The default is that the reduction variable is the last in statement.  */
4591   int reduc_index = 2;
4592   bool double_reduc = false, dummy;
4593   basic_block def_bb;
4594   struct loop * def_stmt_loop, *outer_loop = NULL;
4595   tree def_arg;
4596   gimple def_arg_stmt;
4597   vec<tree> vec_oprnds0 = vNULL;
4598   vec<tree> vec_oprnds1 = vNULL;
4599   vec<tree> vect_defs = vNULL;
4600   vec<gimple> phis = vNULL;
4601   int vec_num;
4602   tree def0, def1, tem, op0, op1 = NULL_TREE;
4603
4604   /* In case of reduction chain we switch to the first stmt in the chain, but
4605      we don't update STMT_INFO, since only the last stmt is marked as reduction
4606      and has reduction properties.  */
4607   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4608     stmt = GROUP_FIRST_ELEMENT (stmt_info);
4609
4610   if (nested_in_vect_loop_p (loop, stmt))
4611     {
4612       outer_loop = loop;
4613       loop = loop->inner;
4614       nested_cycle = true;
4615     }
4616
4617   /* 1. Is vectorizable reduction?  */
4618   /* Not supportable if the reduction variable is used in the loop, unless
4619      it's a reduction chain.  */
4620   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4621       && !GROUP_FIRST_ELEMENT (stmt_info))
4622     return false;
4623
4624   /* Reductions that are not used even in an enclosing outer-loop,
4625      are expected to be "live" (used out of the loop).  */
4626   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4627       && !STMT_VINFO_LIVE_P (stmt_info))
4628     return false;
4629
4630   /* Make sure it was already recognized as a reduction computation.  */
4631   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4632       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4633     return false;
4634
4635   /* 2. Has this been recognized as a reduction pattern?
4636
4637      Check if STMT represents a pattern that has been recognized
4638      in earlier analysis stages.  For stmts that represent a pattern,
4639      the STMT_VINFO_RELATED_STMT field records the last stmt in
4640      the original sequence that constitutes the pattern.  */
4641
4642   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4643   if (orig_stmt)
4644     {
4645       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4646       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4647       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4648     }
4649
4650   /* 3. Check the operands of the operation.  The first operands are defined
4651         inside the loop body. The last operand is the reduction variable,
4652         which is defined by the loop-header-phi.  */
4653
4654   gcc_assert (is_gimple_assign (stmt));
4655
4656   /* Flatten RHS.  */
4657   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4658     {
4659     case GIMPLE_SINGLE_RHS:
4660       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4661       if (op_type == ternary_op)
4662         {
4663           tree rhs = gimple_assign_rhs1 (stmt);
4664           ops[0] = TREE_OPERAND (rhs, 0);
4665           ops[1] = TREE_OPERAND (rhs, 1);
4666           ops[2] = TREE_OPERAND (rhs, 2);
4667           code = TREE_CODE (rhs);
4668         }
4669       else
4670         return false;
4671       break;
4672
4673     case GIMPLE_BINARY_RHS:
4674       code = gimple_assign_rhs_code (stmt);
4675       op_type = TREE_CODE_LENGTH (code);
4676       gcc_assert (op_type == binary_op);
4677       ops[0] = gimple_assign_rhs1 (stmt);
4678       ops[1] = gimple_assign_rhs2 (stmt);
4679       break;
4680
4681     case GIMPLE_TERNARY_RHS:
4682       code = gimple_assign_rhs_code (stmt);
4683       op_type = TREE_CODE_LENGTH (code);
4684       gcc_assert (op_type == ternary_op);
4685       ops[0] = gimple_assign_rhs1 (stmt);
4686       ops[1] = gimple_assign_rhs2 (stmt);
4687       ops[2] = gimple_assign_rhs3 (stmt);
4688       break;
4689
4690     case GIMPLE_UNARY_RHS:
4691       return false;
4692
4693     default:
4694       gcc_unreachable ();
4695     }
4696
4697   if (code == COND_EXPR && slp_node)
4698     return false;
4699
4700   scalar_dest = gimple_assign_lhs (stmt);
4701   scalar_type = TREE_TYPE (scalar_dest);
4702   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4703       && !SCALAR_FLOAT_TYPE_P (scalar_type))
4704     return false;
4705
4706   /* Do not try to vectorize bit-precision reductions.  */
4707   if ((TYPE_PRECISION (scalar_type)
4708        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4709     return false;
4710
4711   /* All uses but the last are expected to be defined in the loop.
4712      The last use is the reduction variable.  In case of nested cycle this
4713      assumption is not true: we use reduc_index to record the index of the
4714      reduction variable.  */
4715   for (i = 0; i < op_type - 1; i++)
4716     {
4717       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
4718       if (i == 0 && code == COND_EXPR)
4719         continue;
4720
4721       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4722                                             &def_stmt, &def, &dt, &tem);
4723       if (!vectype_in)
4724         vectype_in = tem;
4725       gcc_assert (is_simple_use);
4726
4727       if (dt != vect_internal_def
4728           && dt != vect_external_def
4729           && dt != vect_constant_def
4730           && dt != vect_induction_def
4731           && !(dt == vect_nested_cycle && nested_cycle))
4732         return false;
4733
4734       if (dt == vect_nested_cycle)
4735         {
4736           found_nested_cycle_def = true;
4737           reduc_def_stmt = def_stmt;
4738           reduc_index = i;
4739         }
4740     }
4741
4742   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4743                                         &def_stmt, &def, &dt, &tem);
4744   if (!vectype_in)
4745     vectype_in = tem;
4746   gcc_assert (is_simple_use);
4747   if (!(dt == vect_reduction_def
4748         || dt == vect_nested_cycle
4749         || ((dt == vect_internal_def || dt == vect_external_def
4750              || dt == vect_constant_def || dt == vect_induction_def)
4751             && nested_cycle && found_nested_cycle_def)))
4752     {
4753       /* For pattern recognized stmts, orig_stmt might be a reduction,
4754          but some helper statements for the pattern might not, or
4755          might be COND_EXPRs with reduction uses in the condition.  */
4756       gcc_assert (orig_stmt);
4757       return false;
4758     }
4759   if (!found_nested_cycle_def)
4760     reduc_def_stmt = def_stmt;
4761
4762   gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4763   if (orig_stmt)
4764     gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4765                                                        reduc_def_stmt,
4766                                                        !nested_cycle,
4767                                                        &dummy));
4768   else
4769     {
4770       gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4771                                              !nested_cycle, &dummy);
4772       /* We changed STMT to be the first stmt in reduction chain, hence we
4773          check that in this case the first element in the chain is STMT.  */
4774       gcc_assert (stmt == tmp
4775                   || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4776     }
4777
4778   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4779     return false;
4780
4781   if (slp_node || PURE_SLP_STMT (stmt_info))
4782     ncopies = 1;
4783   else
4784     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4785                / TYPE_VECTOR_SUBPARTS (vectype_in));
4786
4787   gcc_assert (ncopies >= 1);
4788
4789   vec_mode = TYPE_MODE (vectype_in);
4790
4791   if (code == COND_EXPR)
4792     {
4793       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4794         {
4795           if (dump_enabled_p ())
4796             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4797                              "unsupported condition in reduction");
4798
4799             return false;
4800         }
4801     }
4802   else
4803     {
4804       /* 4. Supportable by target?  */
4805
4806       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
4807           || code == LROTATE_EXPR || code == RROTATE_EXPR)
4808         {
4809           /* Shifts and rotates are only supported by vectorizable_shifts,
4810              not vectorizable_reduction.  */
4811           if (dump_enabled_p ())
4812             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4813                              "unsupported shift or rotation.");
4814           return false;
4815         }
4816
4817       /* 4.1. check support for the operation in the loop  */
4818       optab = optab_for_tree_code (code, vectype_in, optab_default);
4819       if (!optab)
4820         {
4821           if (dump_enabled_p ())
4822             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4823                              "no optab.");
4824
4825           return false;
4826         }
4827
4828       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4829         {
4830           if (dump_enabled_p ())
4831             dump_printf (MSG_NOTE, "op not supported by target.");
4832
4833           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4834               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4835                   < vect_min_worthwhile_factor (code))
4836             return false;
4837
4838           if (dump_enabled_p ())
4839             dump_printf (MSG_NOTE, "proceeding using word mode.");
4840         }
4841
4842       /* Worthwhile without SIMD support?  */
4843       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4844           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4845              < vect_min_worthwhile_factor (code))
4846         {
4847           if (dump_enabled_p ())
4848             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4849                              "not worthwhile without SIMD support.");
4850
4851           return false;
4852         }
4853     }
4854
4855   /* 4.2. Check support for the epilog operation.
4856
4857           If STMT represents a reduction pattern, then the type of the
4858           reduction variable may be different than the type of the rest
4859           of the arguments.  For example, consider the case of accumulation
4860           of shorts into an int accumulator; The original code:
4861                         S1: int_a = (int) short_a;
4862           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
4863
4864           was replaced with:
4865                         STMT: int_acc = widen_sum <short_a, int_acc>
4866
4867           This means that:
4868           1. The tree-code that is used to create the vector operation in the
4869              epilog code (that reduces the partial results) is not the
4870              tree-code of STMT, but is rather the tree-code of the original
4871              stmt from the pattern that STMT is replacing.  I.e, in the example
4872              above we want to use 'widen_sum' in the loop, but 'plus' in the
4873              epilog.
4874           2. The type (mode) we use to check available target support
4875              for the vector operation to be created in the *epilog*, is
4876              determined by the type of the reduction variable (in the example
4877              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4878              However the type (mode) we use to check available target support
4879              for the vector operation to be created *inside the loop*, is
4880              determined by the type of the other arguments to STMT (in the
4881              example we'd check this: optab_handler (widen_sum_optab,
4882              vect_short_mode)).
4883
4884           This is contrary to "regular" reductions, in which the types of all
4885           the arguments are the same as the type of the reduction variable.
4886           For "regular" reductions we can therefore use the same vector type
4887           (and also the same tree-code) when generating the epilog code and
4888           when generating the code inside the loop.  */
4889
4890   if (orig_stmt)
4891     {
4892       /* This is a reduction pattern: get the vectype from the type of the
4893          reduction variable, and get the tree-code from orig_stmt.  */
4894       orig_code = gimple_assign_rhs_code (orig_stmt);
4895       gcc_assert (vectype_out);
4896       vec_mode = TYPE_MODE (vectype_out);
4897     }
4898   else
4899     {
4900       /* Regular reduction: use the same vectype and tree-code as used for
4901          the vector code inside the loop can be used for the epilog code. */
4902       orig_code = code;
4903     }
4904
4905   if (nested_cycle)
4906     {
4907       def_bb = gimple_bb (reduc_def_stmt);
4908       def_stmt_loop = def_bb->loop_father;
4909       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4910                                        loop_preheader_edge (def_stmt_loop));
4911       if (TREE_CODE (def_arg) == SSA_NAME
4912           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4913           && gimple_code (def_arg_stmt) == GIMPLE_PHI
4914           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4915           && vinfo_for_stmt (def_arg_stmt)
4916           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4917               == vect_double_reduction_def)
4918         double_reduc = true;
4919     }
4920
4921   epilog_reduc_code = ERROR_MARK;
4922   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4923     {
4924       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4925                                          optab_default);
4926       if (!reduc_optab)
4927         {
4928           if (dump_enabled_p ())
4929             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4930                              "no optab for reduction.");
4931
4932           epilog_reduc_code = ERROR_MARK;
4933         }
4934
4935       if (reduc_optab
4936           && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4937         {
4938           if (dump_enabled_p ())
4939             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4940                              "reduc op not supported by target.");
4941
4942           epilog_reduc_code = ERROR_MARK;
4943         }
4944     }
4945   else
4946     {
4947       if (!nested_cycle || double_reduc)
4948         {
4949           if (dump_enabled_p ())
4950             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4951                              "no reduc code for scalar code.");
4952
4953           return false;
4954         }
4955     }
4956
4957   if (double_reduc && ncopies > 1)
4958     {
4959       if (dump_enabled_p ())
4960         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4961                          "multiple types in double reduction");
4962
4963       return false;
4964     }
4965
4966   /* In case of widenning multiplication by a constant, we update the type
4967      of the constant to be the type of the other operand.  We check that the
4968      constant fits the type in the pattern recognition pass.  */
4969   if (code == DOT_PROD_EXPR
4970       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
4971     {
4972       if (TREE_CODE (ops[0]) == INTEGER_CST)
4973         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
4974       else if (TREE_CODE (ops[1]) == INTEGER_CST)
4975         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4976       else
4977         {
4978           if (dump_enabled_p ())
4979             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4980                              "invalid types in dot-prod");
4981
4982           return false;
4983         }
4984     }
4985
4986   if (!vec_stmt) /* transformation not required.  */
4987     {
4988       if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4989         return false;
4990       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4991       return true;
4992     }
4993
4994   /** Transform.  **/
4995
4996   if (dump_enabled_p ())
4997     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.");
4998
4999   /* FORNOW: Multiple types are not supported for condition.  */
5000   if (code == COND_EXPR)
5001     gcc_assert (ncopies == 1);
5002
5003   /* Create the destination vector  */
5004   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5005
5006   /* In case the vectorization factor (VF) is bigger than the number
5007      of elements that we can fit in a vectype (nunits), we have to generate
5008      more than one vector stmt - i.e - we need to "unroll" the
5009      vector stmt by a factor VF/nunits.  For more details see documentation
5010      in vectorizable_operation.  */
5011
5012   /* If the reduction is used in an outer loop we need to generate
5013      VF intermediate results, like so (e.g. for ncopies=2):
5014         r0 = phi (init, r0)
5015         r1 = phi (init, r1)
5016         r0 = x0 + r0;
5017         r1 = x1 + r1;
5018     (i.e. we generate VF results in 2 registers).
5019     In this case we have a separate def-use cycle for each copy, and therefore
5020     for each copy we get the vector def for the reduction variable from the
5021     respective phi node created for this copy.
5022
5023     Otherwise (the reduction is unused in the loop nest), we can combine
5024     together intermediate results, like so (e.g. for ncopies=2):
5025         r = phi (init, r)
5026         r = x0 + r;
5027         r = x1 + r;
5028    (i.e. we generate VF/2 results in a single register).
5029    In this case for each copy we get the vector def for the reduction variable
5030    from the vectorized reduction operation generated in the previous iteration.
5031   */
5032
5033   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5034     {
5035       single_defuse_cycle = true;
5036       epilog_copies = 1;
5037     }
5038   else
5039     epilog_copies = ncopies;
5040
5041   prev_stmt_info = NULL;
5042   prev_phi_info = NULL;
5043   if (slp_node)
5044     {
5045       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5046       gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5047                   == TYPE_VECTOR_SUBPARTS (vectype_in));
5048     }
5049   else
5050     {
5051       vec_num = 1;
5052       vec_oprnds0.create (1);
5053       if (op_type == ternary_op)
5054         vec_oprnds1.create (1);
5055     }
5056
5057   phis.create (vec_num);
5058   vect_defs.create (vec_num);
5059   if (!slp_node)
5060     vect_defs.quick_push (NULL_TREE);
5061
5062   for (j = 0; j < ncopies; j++)
5063     {
5064       if (j == 0 || !single_defuse_cycle)
5065         {
5066           for (i = 0; i < vec_num; i++)
5067             {
5068               /* Create the reduction-phi that defines the reduction
5069                  operand.  */
5070               new_phi = create_phi_node (vec_dest, loop->header);
5071               set_vinfo_for_stmt (new_phi,
5072                                   new_stmt_vec_info (new_phi, loop_vinfo,
5073                                                      NULL));
5074                if (j == 0 || slp_node)
5075                  phis.quick_push (new_phi);
5076             }
5077         }
5078
5079       if (code == COND_EXPR)
5080         {
5081           gcc_assert (!slp_node);
5082           vectorizable_condition (stmt, gsi, vec_stmt,
5083                                   PHI_RESULT (phis[0]),
5084                                   reduc_index, NULL);
5085           /* Multiple types are not supported for condition.  */
5086           break;
5087         }
5088
5089       /* Handle uses.  */
5090       if (j == 0)
5091         {
5092           op0 = ops[!reduc_index];
5093           if (op_type == ternary_op)
5094             {
5095               if (reduc_index == 0)
5096                 op1 = ops[2];
5097               else
5098                 op1 = ops[1];
5099             }
5100
5101           if (slp_node)
5102             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5103                                slp_node, -1);
5104           else
5105             {
5106               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5107                                                             stmt, NULL);
5108               vec_oprnds0.quick_push (loop_vec_def0);
5109               if (op_type == ternary_op)
5110                {
5111                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5112                                                                NULL);
5113                  vec_oprnds1.quick_push (loop_vec_def1);
5114                }
5115             }
5116         }
5117       else
5118         {
5119           if (!slp_node)
5120             {
5121               enum vect_def_type dt;
5122               gimple dummy_stmt;
5123               tree dummy;
5124
5125               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5126                                   &dummy_stmt, &dummy, &dt);
5127               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5128                                                               loop_vec_def0);
5129               vec_oprnds0[0] = loop_vec_def0;
5130               if (op_type == ternary_op)
5131                 {
5132                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5133                                       &dummy, &dt);
5134                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5135                                                                 loop_vec_def1);
5136                   vec_oprnds1[0] = loop_vec_def1;
5137                 }
5138             }
5139
5140           if (single_defuse_cycle)
5141             reduc_def = gimple_assign_lhs (new_stmt);
5142
5143           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5144         }
5145
5146       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5147         {
5148           if (slp_node)
5149             reduc_def = PHI_RESULT (phis[i]);
5150           else
5151             {
5152               if (!single_defuse_cycle || j == 0)
5153                 reduc_def = PHI_RESULT (new_phi);
5154             }
5155
5156           def1 = ((op_type == ternary_op)
5157                   ? vec_oprnds1[i] : NULL);
5158           if (op_type == binary_op)
5159             {
5160               if (reduc_index == 0)
5161                 expr = build2 (code, vectype_out, reduc_def, def0);
5162               else
5163                 expr = build2 (code, vectype_out, def0, reduc_def);
5164             }
5165           else
5166             {
5167               if (reduc_index == 0)
5168                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5169               else
5170                 {
5171                   if (reduc_index == 1)
5172                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5173                   else
5174                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5175                 }
5176             }
5177
5178           new_stmt = gimple_build_assign (vec_dest, expr);
5179           new_temp = make_ssa_name (vec_dest, new_stmt);
5180           gimple_assign_set_lhs (new_stmt, new_temp);
5181           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5182
5183           if (slp_node)
5184             {
5185               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5186               vect_defs.quick_push (new_temp);
5187             }
5188           else
5189             vect_defs[0] = new_temp;
5190         }
5191
5192       if (slp_node)
5193         continue;
5194
5195       if (j == 0)
5196         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5197       else
5198         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5199
5200       prev_stmt_info = vinfo_for_stmt (new_stmt);
5201       prev_phi_info = vinfo_for_stmt (new_phi);
5202     }
5203
5204   /* Finalize the reduction-phi (set its arguments) and create the
5205      epilog reduction code.  */
5206   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5207     {
5208       new_temp = gimple_assign_lhs (*vec_stmt);
5209       vect_defs[0] = new_temp;
5210     }
5211
5212   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5213                                     epilog_reduc_code, phis, reduc_index,
5214                                     double_reduc, slp_node);
5215
5216   phis.release ();
5217   vect_defs.release ();
5218   vec_oprnds0.release ();
5219   vec_oprnds1.release ();
5220
5221   return true;
5222 }
5223
5224 /* Function vect_min_worthwhile_factor.
5225
5226    For a loop where we could vectorize the operation indicated by CODE,
5227    return the minimum vectorization factor that makes it worthwhile
5228    to use generic vectors.  */
5229 int
5230 vect_min_worthwhile_factor (enum tree_code code)
5231 {
5232   switch (code)
5233     {
5234     case PLUS_EXPR:
5235     case MINUS_EXPR:
5236     case NEGATE_EXPR:
5237       return 4;
5238
5239     case BIT_AND_EXPR:
5240     case BIT_IOR_EXPR:
5241     case BIT_XOR_EXPR:
5242     case BIT_NOT_EXPR:
5243       return 2;
5244
5245     default:
5246       return INT_MAX;
5247     }
5248 }
5249
5250
5251 /* Function vectorizable_induction
5252
5253    Check if PHI performs an induction computation that can be vectorized.
5254    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5255    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5256    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5257
5258 bool
5259 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5260                         gimple *vec_stmt)
5261 {
5262   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5263   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5264   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5265   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5266   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5267   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5268   tree vec_def;
5269
5270   gcc_assert (ncopies >= 1);
5271   /* FORNOW. These restrictions should be relaxed.  */
5272   if (nested_in_vect_loop_p (loop, phi))
5273     {
5274       imm_use_iterator imm_iter;
5275       use_operand_p use_p;
5276       gimple exit_phi;
5277       edge latch_e;
5278       tree loop_arg;
5279
5280       if (ncopies > 1)
5281         {
5282           if (dump_enabled_p ())
5283             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5284                              "multiple types in nested loop.");
5285           return false;
5286         }
5287
5288       exit_phi = NULL;
5289       latch_e = loop_latch_edge (loop->inner);
5290       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5291       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5292         {
5293           if (!flow_bb_inside_loop_p (loop->inner,
5294                                       gimple_bb (USE_STMT (use_p))))
5295             {
5296               exit_phi = USE_STMT (use_p);
5297               break;
5298             }
5299         }
5300       if (exit_phi)
5301         {
5302           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5303           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5304                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5305             {
5306               if (dump_enabled_p ())
5307                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5308                                  "inner-loop induction only used outside "
5309                                  "of the outer vectorized loop.");
5310               return false;
5311             }
5312         }
5313     }
5314
5315   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5316     return false;
5317
5318   /* FORNOW: SLP not supported.  */
5319   if (STMT_SLP_TYPE (stmt_info))
5320     return false;
5321
5322   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5323
5324   if (gimple_code (phi) != GIMPLE_PHI)
5325     return false;
5326
5327   if (!vec_stmt) /* transformation not required.  */
5328     {
5329       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5330       if (dump_enabled_p ())
5331         dump_printf_loc (MSG_NOTE, vect_location,
5332                          "=== vectorizable_induction ===");
5333       vect_model_induction_cost (stmt_info, ncopies);
5334       return true;
5335     }
5336
5337   /** Transform.  **/
5338
5339   if (dump_enabled_p ())
5340     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.");
5341
5342   vec_def = get_initial_def_for_induction (phi);
5343   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5344   return true;
5345 }
5346
5347 /* Function vectorizable_live_operation.
5348
5349    STMT computes a value that is used outside the loop.  Check if
5350    it can be supported.  */
5351
5352 bool
5353 vectorizable_live_operation (gimple stmt,
5354                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5355                              gimple *vec_stmt ATTRIBUTE_UNUSED)
5356 {
5357   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5358   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5359   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5360   int i;
5361   int op_type;
5362   tree op;
5363   tree def;
5364   gimple def_stmt;
5365   enum vect_def_type dt;
5366   enum tree_code code;
5367   enum gimple_rhs_class rhs_class;
5368
5369   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5370
5371   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5372     return false;
5373
5374   if (!is_gimple_assign (stmt))
5375     return false;
5376
5377   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5378     return false;
5379
5380   /* FORNOW. CHECKME. */
5381   if (nested_in_vect_loop_p (loop, stmt))
5382     return false;
5383
5384   code = gimple_assign_rhs_code (stmt);
5385   op_type = TREE_CODE_LENGTH (code);
5386   rhs_class = get_gimple_rhs_class (code);
5387   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5388   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5389
5390   /* FORNOW: support only if all uses are invariant.  This means
5391      that the scalar operations can remain in place, unvectorized.
5392      The original last scalar value that they compute will be used.  */
5393
5394   for (i = 0; i < op_type; i++)
5395     {
5396       if (rhs_class == GIMPLE_SINGLE_RHS)
5397         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5398       else
5399         op = gimple_op (stmt, i + 1);
5400       if (op
5401           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5402                                   &dt))
5403         {
5404           if (dump_enabled_p ())
5405             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406                              "use not simple.");
5407           return false;
5408         }
5409
5410       if (dt != vect_external_def && dt != vect_constant_def)
5411         return false;
5412     }
5413
5414   /* No transformation is required for the cases we currently support.  */
5415   return true;
5416 }
5417
5418 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5419
5420 static void
5421 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5422 {
5423   ssa_op_iter op_iter;
5424   imm_use_iterator imm_iter;
5425   def_operand_p def_p;
5426   gimple ustmt;
5427
5428   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5429     {
5430       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5431         {
5432           basic_block bb;
5433
5434           if (!is_gimple_debug (ustmt))
5435             continue;
5436
5437           bb = gimple_bb (ustmt);
5438
5439           if (!flow_bb_inside_loop_p (loop, bb))
5440             {
5441               if (gimple_debug_bind_p (ustmt))
5442                 {
5443                   if (dump_enabled_p ())
5444                     dump_printf_loc (MSG_NOTE, vect_location,
5445                                      "killing debug use");
5446
5447                   gimple_debug_bind_reset_value (ustmt);
5448                   update_stmt (ustmt);
5449                 }
5450               else
5451                 gcc_unreachable ();
5452             }
5453         }
5454     }
5455 }
5456
5457 /* Function vect_transform_loop.
5458
5459    The analysis phase has determined that the loop is vectorizable.
5460    Vectorize the loop - created vectorized stmts to replace the scalar
5461    stmts in the loop, and update the loop exit condition.  */
5462
5463 void
5464 vect_transform_loop (loop_vec_info loop_vinfo)
5465 {
5466   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5467   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5468   int nbbs = loop->num_nodes;
5469   gimple_stmt_iterator si;
5470   int i;
5471   tree ratio = NULL;
5472   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5473   bool grouped_store;
5474   bool slp_scheduled = false;
5475   unsigned int nunits;
5476   gimple stmt, pattern_stmt;
5477   gimple_seq pattern_def_seq = NULL;
5478   gimple_stmt_iterator pattern_def_si = gsi_none ();
5479   bool transform_pattern_stmt = false;
5480   bool check_profitability = false;
5481   int th;
5482   /* Record number of iterations before we started tampering with the profile. */
5483   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5484
5485   if (dump_enabled_p ())
5486     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
5487
5488   /* If profile is inprecise, we have chance to fix it up.  */
5489   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5490     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5491
5492   /* Use the more conservative vectorization threshold.  If the number
5493      of iterations is constant assume the cost check has been performed
5494      by our caller.  If the threshold makes all loops profitable that
5495      run at least the vectorization factor number of times checking
5496      is pointless, too.  */
5497   th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
5498          * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
5499   th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo));
5500   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5501       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5502     {
5503       if (dump_enabled_p ())
5504         dump_printf_loc (MSG_NOTE, vect_location,
5505                          "Profitability threshold is %d loop iterations.", th);
5506       check_profitability = true;
5507     }
5508
5509   /* Peel the loop if there are data refs with unknown alignment.
5510      Only one data ref with unknown store is allowed.  */
5511
5512   if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5513     {
5514       vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
5515       check_profitability = false;
5516     }
5517
5518   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5519       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5520     {
5521       vect_loop_versioning (loop_vinfo, th, check_profitability);
5522       check_profitability = false;
5523     }
5524
5525   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5526      compile time constant), or it is a constant that doesn't divide by the
5527      vectorization factor, then an epilog loop needs to be created.
5528      We therefore duplicate the loop: the original loop will be vectorized,
5529      and will compute the first (n/VF) iterations.  The second copy of the loop
5530      will remain scalar and will compute the remaining (n%VF) iterations.
5531      (VF is the vectorization factor).  */
5532
5533   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5534        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5535            && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
5536        || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5537     vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
5538                                     th, check_profitability);
5539   else
5540     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5541                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5542
5543   /* 1) Make sure the loop header has exactly two entries
5544      2) Make sure we have a preheader basic block.  */
5545
5546   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5547
5548   split_edge (loop_preheader_edge (loop));
5549
5550   /* FORNOW: the vectorizer supports only loops which body consist
5551      of one basic block (header + empty latch). When the vectorizer will
5552      support more involved loop forms, the order by which the BBs are
5553      traversed need to be reconsidered.  */
5554
5555   for (i = 0; i < nbbs; i++)
5556     {
5557       basic_block bb = bbs[i];
5558       stmt_vec_info stmt_info;
5559       gimple phi;
5560
5561       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
5562         {
5563           phi = gsi_stmt (si);
5564           if (dump_enabled_p ())
5565             {
5566               dump_printf_loc (MSG_NOTE, vect_location,
5567                                "------>vectorizing phi: ");
5568               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
5569             }
5570           stmt_info = vinfo_for_stmt (phi);
5571           if (!stmt_info)
5572             continue;
5573
5574           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5575             vect_loop_kill_debug_uses (loop, phi);
5576
5577           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5578               && !STMT_VINFO_LIVE_P (stmt_info))
5579             continue;
5580
5581           if (STMT_VINFO_VECTYPE (stmt_info)
5582               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5583                   != (unsigned HOST_WIDE_INT) vectorization_factor)
5584               && dump_enabled_p ())
5585             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.");
5586
5587           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5588             {
5589               if (dump_enabled_p ())
5590                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.");
5591               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5592             }
5593         }
5594
5595       pattern_stmt = NULL;
5596       for (si = gsi_start_bb (bb); !gsi_end_p (si) || transform_pattern_stmt;)
5597         {
5598           bool is_store;
5599
5600           if (transform_pattern_stmt)
5601             stmt = pattern_stmt;
5602           else
5603             stmt = gsi_stmt (si);
5604
5605           if (dump_enabled_p ())
5606             {
5607               dump_printf_loc (MSG_NOTE, vect_location,
5608                                "------>vectorizing statement: ");
5609               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5610             }
5611
5612           stmt_info = vinfo_for_stmt (stmt);
5613
5614           /* vector stmts created in the outer-loop during vectorization of
5615              stmts in an inner-loop may not have a stmt_info, and do not
5616              need to be vectorized.  */
5617           if (!stmt_info)
5618             {
5619               gsi_next (&si);
5620               continue;
5621             }
5622
5623           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5624             vect_loop_kill_debug_uses (loop, stmt);
5625
5626           if (!STMT_VINFO_RELEVANT_P (stmt_info)
5627               && !STMT_VINFO_LIVE_P (stmt_info))
5628             {
5629               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5630                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5631                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5632                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5633                 {
5634                   stmt = pattern_stmt;
5635                   stmt_info = vinfo_for_stmt (stmt);
5636                 }
5637               else
5638                 {
5639                   gsi_next (&si);
5640                   continue;
5641                 }
5642             }
5643           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5644                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
5645                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5646                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5647             transform_pattern_stmt = true;
5648
5649           /* If pattern statement has def stmts, vectorize them too.  */
5650           if (is_pattern_stmt_p (stmt_info))
5651             {
5652               if (pattern_def_seq == NULL)
5653                 {
5654                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
5655                   pattern_def_si = gsi_start (pattern_def_seq);
5656                 }
5657               else if (!gsi_end_p (pattern_def_si))
5658                 gsi_next (&pattern_def_si);
5659               if (pattern_def_seq != NULL)
5660                 {
5661                   gimple pattern_def_stmt = NULL;
5662                   stmt_vec_info pattern_def_stmt_info = NULL;
5663
5664                   while (!gsi_end_p (pattern_def_si))
5665                     {
5666                       pattern_def_stmt = gsi_stmt (pattern_def_si);
5667                       pattern_def_stmt_info
5668                         = vinfo_for_stmt (pattern_def_stmt);
5669                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
5670                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
5671                         break;
5672                       gsi_next (&pattern_def_si);
5673                     }
5674
5675                   if (!gsi_end_p (pattern_def_si))
5676                     {
5677                       if (dump_enabled_p ())
5678                         {
5679                           dump_printf_loc (MSG_NOTE, vect_location,
5680                                            "==> vectorizing pattern def "
5681                                            "stmt: ");
5682                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
5683                                             pattern_def_stmt, 0);
5684                         }
5685
5686                       stmt = pattern_def_stmt;
5687                       stmt_info = pattern_def_stmt_info;
5688                     }
5689                   else
5690                     {
5691                       pattern_def_si = gsi_none ();
5692                       transform_pattern_stmt = false;
5693                     }
5694                 }
5695               else
5696                 transform_pattern_stmt = false;
5697             }
5698
5699           gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5700           nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
5701                                                STMT_VINFO_VECTYPE (stmt_info));
5702           if (!STMT_SLP_TYPE (stmt_info)
5703               && nunits != (unsigned int) vectorization_factor
5704               && dump_enabled_p ())
5705             /* For SLP VF is set according to unrolling factor, and not to
5706                vector size, hence for SLP this print is not valid.  */
5707             dump_printf_loc (MSG_NOTE, vect_location,
5708                              "multiple-types.");
5709
5710           /* SLP. Schedule all the SLP instances when the first SLP stmt is
5711              reached.  */
5712           if (STMT_SLP_TYPE (stmt_info))
5713             {
5714               if (!slp_scheduled)
5715                 {
5716                   slp_scheduled = true;
5717
5718                   if (dump_enabled_p ())
5719                     dump_printf_loc (MSG_NOTE, vect_location,
5720                                      "=== scheduling SLP instances ===");
5721
5722                   vect_schedule_slp (loop_vinfo, NULL);
5723                 }
5724
5725               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
5726               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
5727                 {
5728                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5729                     {
5730                       pattern_def_seq = NULL;
5731                       gsi_next (&si);
5732                     }
5733                   continue;
5734                 }
5735             }
5736
5737           /* -------- vectorize statement ------------ */
5738           if (dump_enabled_p ())
5739             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.");
5740
5741           grouped_store = false;
5742           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
5743           if (is_store)
5744             {
5745               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5746                 {
5747                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
5748                      interleaving chain was completed - free all the stores in
5749                      the chain.  */
5750                   gsi_next (&si);
5751                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
5752                   continue;
5753                 }
5754               else
5755                 {
5756                   /* Free the attached stmt_vec_info and remove the stmt.  */
5757                   gimple store = gsi_stmt (si);
5758                   free_stmt_vec_info (store);
5759                   unlink_stmt_vdef (store);
5760                   gsi_remove (&si, true);
5761                   release_defs (store);
5762                   continue;
5763                 }
5764             }
5765
5766           if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
5767             {
5768               pattern_def_seq = NULL;
5769               gsi_next (&si);
5770             }
5771         }                       /* stmts in BB */
5772     }                           /* BBs in loop */
5773
5774   slpeel_make_loop_iterate_ntimes (loop, ratio);
5775
5776   /* Reduce loop iterations by the vectorization factor.  */
5777   scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor),
5778                       expected_iterations / vectorization_factor);
5779   loop->nb_iterations_upper_bound
5780     = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
5781                                             FLOOR_DIV_EXPR);
5782   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5783       && loop->nb_iterations_upper_bound != double_int_zero)
5784     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
5785   if (loop->any_estimate)
5786     {
5787       loop->nb_iterations_estimate
5788         = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
5789                                              FLOOR_DIV_EXPR);
5790        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
5791            && loop->nb_iterations_estimate != double_int_zero)
5792          loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
5793     }
5794
5795   /* The memory tags and pointers in vectorized statements need to
5796      have their SSA forms updated.  FIXME, why can't this be delayed
5797      until all the loops have been transformed?  */
5798   update_ssa (TODO_update_ssa);
5799
5800   if (dump_enabled_p ())
5801     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "LOOP VECTORIZED.");
5802   if (loop->inner && dump_enabled_p ())
5803     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
5804                      "OUTER LOOP VECTORIZED.");
5805 }