gcc/tree-vect-loop.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com> and
   4    Ira Rosen <irar@il.ibm.com>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "dumpfile.h"
  26 #include "backend.h"
  27 #include "cfghooks.h"
  28 #include "tree.h"
  29 #include "gimple.h"
  30 #include "rtl.h"
  31 #include "ssa.h"
  32 #include "alias.h"
  33 #include "fold-const.h"
  34 #include "stor-layout.h"
  35 #include "cfganal.h"
  36 #include "gimple-pretty-print.h"
  37 #include "internal-fn.h"
  38 #include "gimplify.h"
  39 #include "gimple-iterator.h"
  40 #include "gimplify-me.h"
  41 #include "tree-ssa-loop-ivopts.h"
  42 #include "tree-ssa-loop-manip.h"
  43 #include "tree-ssa-loop-niter.h"
  44 #include "tree-pass.h"
  45 #include "cfgloop.h"
  46 #include "flags.h"
  47 #include "insn-config.h"
  48 #include "expmed.h"
  49 #include "dojump.h"
  50 #include "explow.h"
  51 #include "calls.h"
  52 #include "emit-rtl.h"
  53 #include "varasm.h"
  54 #include "stmt.h"
  55 #include "expr.h"
  56 #include "recog.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "params.h"
  60 #include "diagnostic-core.h"
  61 #include "tree-chrec.h"
  62 #include "tree-scalar-evolution.h"
  63 #include "tree-vectorizer.h"
  64 #include "target.h"
  65
  66 /* Loop Vectorization Pass.
  67
  68    This pass tries to vectorize loops.
  69
  70    For example, the vectorizer transforms the following simple loop:
  71
  72         short a[N]; short b[N]; short c[N]; int i;
  73
  74         for (i=0; i<N; i++){
  75           a[i] = b[i] + c[i];
  76         }
  77
  78    as if it was manually vectorized by rewriting the source code into:
  79
  80         typedef int __attribute__((mode(V8HI))) v8hi;
  81         short a[N];  short b[N]; short c[N];   int i;
  82         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  83         v8hi va, vb, vc;
  84
  85         for (i=0; i<N/8; i++){
  86           vb = pb[i];
  87           vc = pc[i];
  88           va = vb + vc;
  89           pa[i] = va;
  90         }
  91
  92         The main entry to this pass is vectorize_loops(), in which
  93    the vectorizer applies a set of analyses on a given set of loops,
  94    followed by the actual vectorization transformation for the loops that
  95    had successfully passed the analysis phase.
  96         Throughout this pass we make a distinction between two types of
  97    data: scalars (which are represented by SSA_NAMES), and memory references
  98    ("data-refs").  These two types of data require different handling both
  99    during analysis and transformation. The types of data-refs that the
 100    vectorizer currently supports are ARRAY_REFS which base is an array DECL
 101    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
 102    accesses are required to have a simple (consecutive) access pattern.
 103
 104    Analysis phase:
 105    ===============
 106         The driver for the analysis phase is vect_analyze_loop().
 107    It applies a set of analyses, some of which rely on the scalar evolution
 108    analyzer (scev) developed by Sebastian Pop.
 109
 110         During the analysis phase the vectorizer records some information
 111    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
 112    loop, as well as general information about the loop as a whole, which is
 113    recorded in a "loop_vec_info" struct attached to each loop.
 114
 115    Transformation phase:
 116    =====================
 117         The loop transformation phase scans all the stmts in the loop, and
 118    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
 119    the loop that needs to be vectorized.  It inserts the vector code sequence
 120    just before the scalar stmt S, and records a pointer to the vector code
 121    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
 122    attached to S).  This pointer will be used for the vectorization of following
 123    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
 124    otherwise, we rely on dead code elimination for removing it.
 125
 126         For example, say stmt S1 was vectorized into stmt VS1:
 127
 128    VS1: vb = px[i];
 129    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 130    S2:  a = b;
 131
 132    To vectorize stmt S2, the vectorizer first finds the stmt that defines
 133    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
 134    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
 135    resulting sequence would be:
 136
 137    VS1: vb = px[i];
 138    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
 139    VS2: va = vb;
 140    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 141
 142         Operands that are not SSA_NAMEs, are data-refs that appear in
 143    load/store operations (like 'x[i]' in S1), and are handled differently.
 144
 145    Target modeling:
 146    =================
 147         Currently the only target specific information that is used is the
 148    size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
 149    Targets that can support different sizes of vectors, for now will need
 150    to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".  More
 151    flexibility will be added in the future.
 152
 153         Since we only vectorize operations which vector form can be
 154    expressed using existing tree codes, to verify that an operation is
 155    supported, the vectorizer checks the relevant optab at the relevant
 156    machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
 157    the value found is CODE_FOR_nothing, then there's no target support, and
 158    we can't vectorize the stmt.
 159
 160    For additional information on this project see:
 161    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 162 */
 163
 164 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 165
 166 /* Function vect_determine_vectorization_factor
 167
 168    Determine the vectorization factor (VF).  VF is the number of data elements
 169    that are operated upon in parallel in a single iteration of the vectorized
 170    loop.  For example, when vectorizing a loop that operates on 4byte elements,
 171    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
 172    elements can fit in a single vector register.
 173
 174    We currently support vectorization of loops in which all types operated upon
 175    are of the same size.  Therefore this function currently sets VF according to
 176    the size of the types operated upon, and fails if there are multiple sizes
 177    in the loop.
 178
 179    VF is also the factor by which the loop iterations are strip-mined, e.g.:
 180    original loop:
 181         for (i=0; i<N; i++){
 182           a[i] = b[i] + c[i];
 183         }
 184
 185    vectorized loop:
 186         for (i=0; i<N; i+=VF){
 187           a[i:VF] = b[i:VF] + c[i:VF];
 188         }
 189 */
 190
 191 static bool
 192 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
 193 {
 194   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 195   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
 196   int nbbs = loop->num_nodes;
 197   unsigned int vectorization_factor = 0;
 198   tree scalar_type;
 199   gphi *phi;
 200   tree vectype;
 201   unsigned int nunits;
 202   stmt_vec_info stmt_info;
 203   int i;
 204   HOST_WIDE_INT dummy;
 205   gimple stmt, pattern_stmt = NULL;
 206   gimple_seq pattern_def_seq = NULL;
 207   gimple_stmt_iterator pattern_def_si = gsi_none ();
 208   bool analyze_pattern_stmt = false;
 209
 210   if (dump_enabled_p ())
 211     dump_printf_loc (MSG_NOTE, vect_location,
 212                      "=== vect_determine_vectorization_factor ===\n");
 213
 214   for (i = 0; i < nbbs; i++)
 215     {
 216       basic_block bb = bbs[i];
 217
 218       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
 219            gsi_next (&si))
 220         {
 221           phi = si.phi ();
 222           stmt_info = vinfo_for_stmt (phi);
 223           if (dump_enabled_p ())
 224             {
 225               dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
 226               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 227               dump_printf (MSG_NOTE, "\n");
 228             }
 229
 230           gcc_assert (stmt_info);
 231
 232           if (STMT_VINFO_RELEVANT_P (stmt_info))
 233             {
 234               gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
 235               scalar_type = TREE_TYPE (PHI_RESULT (phi));
 236
 237               if (dump_enabled_p ())
 238                 {
 239                   dump_printf_loc (MSG_NOTE, vect_location,
 240                                    "get vectype for scalar type:  ");
 241                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 242                   dump_printf (MSG_NOTE, "\n");
 243                 }
 244
 245               vectype = get_vectype_for_scalar_type (scalar_type);
 246               if (!vectype)
 247                 {
 248                   if (dump_enabled_p ())
 249                     {
 250                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 251                                        "not vectorized: unsupported "
 252                                        "data-type ");
 253                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 254                                          scalar_type);
 255                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 256                     }
 257                   return false;
 258                 }
 259               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 260
 261               if (dump_enabled_p ())
 262                 {
 263                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 264                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 265                   dump_printf (MSG_NOTE, "\n");
 266                 }
 267
 268               nunits = TYPE_VECTOR_SUBPARTS (vectype);
 269               if (dump_enabled_p ())
 270                 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
 271                                  nunits);
 272
 273               if (!vectorization_factor
 274                   || (nunits > vectorization_factor))
 275                 vectorization_factor = nunits;
 276             }
 277         }
 278
 279       for (gimple_stmt_iterator si = gsi_start_bb (bb);
 280            !gsi_end_p (si) || analyze_pattern_stmt;)
 281         {
 282           tree vf_vectype;
 283
 284           if (analyze_pattern_stmt)
 285             stmt = pattern_stmt;
 286           else
 287             stmt = gsi_stmt (si);
 288
 289           stmt_info = vinfo_for_stmt (stmt);
 290
 291           if (dump_enabled_p ())
 292             {
 293               dump_printf_loc (MSG_NOTE, vect_location,
 294                                "==> examining statement: ");
 295               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 296               dump_printf (MSG_NOTE, "\n");
 297             }
 298
 299           gcc_assert (stmt_info);
 300
 301           /* Skip stmts which do not need to be vectorized.  */
 302           if ((!STMT_VINFO_RELEVANT_P (stmt_info)
 303                && !STMT_VINFO_LIVE_P (stmt_info))
 304               || gimple_clobber_p (stmt))
 305             {
 306               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 307                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 308                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 309                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 310                 {
 311                   stmt = pattern_stmt;
 312                   stmt_info = vinfo_for_stmt (pattern_stmt);
 313                   if (dump_enabled_p ())
 314                     {
 315                       dump_printf_loc (MSG_NOTE, vect_location,
 316                                        "==> examining pattern statement: ");
 317                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
 318                       dump_printf (MSG_NOTE, "\n");
 319                     }
 320                 }
 321               else
 322                 {
 323                   if (dump_enabled_p ())
 324                     dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
 325                   gsi_next (&si);
 326                   continue;
 327                 }
 328             }
 329           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
 330                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
 331                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
 332                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
 333             analyze_pattern_stmt = true;
 334
 335           /* If a pattern statement has def stmts, analyze them too.  */
 336           if (is_pattern_stmt_p (stmt_info))
 337             {
 338               if (pattern_def_seq == NULL)
 339                 {
 340                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
 341                   pattern_def_si = gsi_start (pattern_def_seq);
 342                 }
 343               else if (!gsi_end_p (pattern_def_si))
 344                 gsi_next (&pattern_def_si);
 345               if (pattern_def_seq != NULL)
 346                 {
 347                   gimple pattern_def_stmt = NULL;
 348                   stmt_vec_info pattern_def_stmt_info = NULL;
 349
 350                   while (!gsi_end_p (pattern_def_si))
 351                     {
 352                       pattern_def_stmt = gsi_stmt (pattern_def_si);
 353                       pattern_def_stmt_info
 354                         = vinfo_for_stmt (pattern_def_stmt);
 355                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
 356                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
 357                         break;
 358                       gsi_next (&pattern_def_si);
 359                     }
 360
 361                   if (!gsi_end_p (pattern_def_si))
 362                     {
 363                       if (dump_enabled_p ())
 364                         {
 365                           dump_printf_loc (MSG_NOTE, vect_location,
 366                                            "==> examining pattern def stmt: ");
 367                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
 368                                             pattern_def_stmt, 0);
 369                           dump_printf (MSG_NOTE, "\n");
 370                         }
 371
 372                       stmt = pattern_def_stmt;
 373                       stmt_info = pattern_def_stmt_info;
 374                     }
 375                   else
 376                     {
 377                       pattern_def_si = gsi_none ();
 378                       analyze_pattern_stmt = false;
 379                     }
 380                 }
 381               else
 382                 analyze_pattern_stmt = false;
 383             }
 384
 385           if (gimple_get_lhs (stmt) == NULL_TREE
 386               /* MASK_STORE has no lhs, but is ok.  */
 387               && (!is_gimple_call (stmt)
 388                   || !gimple_call_internal_p (stmt)
 389                   || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
 390             {
 391               if (is_gimple_call (stmt))
 392                 {
 393                   /* Ignore calls with no lhs.  These must be calls to
 394                      #pragma omp simd functions, and what vectorization factor
 395                      it really needs can't be determined until
 396                      vectorizable_simd_clone_call.  */
 397                   if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 398                     {
 399                       pattern_def_seq = NULL;
 400                       gsi_next (&si);
 401                     }
 402                   continue;
 403                 }
 404               if (dump_enabled_p ())
 405                 {
 406                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 407                                    "not vectorized: irregular stmt.");
 408                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,  TDF_SLIM, stmt,
 409                                     0);
 410                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 411                 }
 412               return false;
 413             }
 414
 415           if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 416             {
 417               if (dump_enabled_p ())
 418                 {
 419                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 420                                    "not vectorized: vector stmt in loop:");
 421                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
 422                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 423                 }
 424               return false;
 425             }
 426
 427           if (STMT_VINFO_VECTYPE (stmt_info))
 428             {
 429               /* The only case when a vectype had been already set is for stmts
 430                  that contain a dataref, or for "pattern-stmts" (stmts
 431                  generated by the vectorizer to represent/replace a certain
 432                  idiom).  */
 433               gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
 434                           || is_pattern_stmt_p (stmt_info)
 435                           || !gsi_end_p (pattern_def_si));
 436               vectype = STMT_VINFO_VECTYPE (stmt_info);
 437             }
 438           else
 439             {
 440               gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
 441               if (is_gimple_call (stmt)
 442                   && gimple_call_internal_p (stmt)
 443                   && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
 444                 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
 445               else
 446                 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 447               if (dump_enabled_p ())
 448                 {
 449                   dump_printf_loc (MSG_NOTE, vect_location,
 450                                    "get vectype for scalar type:  ");
 451                   dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 452                   dump_printf (MSG_NOTE, "\n");
 453                 }
 454               vectype = get_vectype_for_scalar_type (scalar_type);
 455               if (!vectype)
 456                 {
 457                   if (dump_enabled_p ())
 458                     {
 459                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 460                                        "not vectorized: unsupported "
 461                                        "data-type ");
 462                       dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 463                                          scalar_type);
 464                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 465                     }
 466                   return false;
 467                 }
 468
 469               STMT_VINFO_VECTYPE (stmt_info) = vectype;
 470
 471               if (dump_enabled_p ())
 472                 {
 473                   dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 474                   dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
 475                   dump_printf (MSG_NOTE, "\n");
 476                 }
 477             }
 478
 479           /* The vectorization factor is according to the smallest
 480              scalar type (or the largest vector size, but we only
 481              support one vector size per loop).  */
 482           scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
 483                                                        &dummy);
 484           if (dump_enabled_p ())
 485             {
 486               dump_printf_loc (MSG_NOTE, vect_location,
 487                                "get vectype for scalar type:  ");
 488               dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
 489               dump_printf (MSG_NOTE, "\n");
 490             }
 491           vf_vectype = get_vectype_for_scalar_type (scalar_type);
 492           if (!vf_vectype)
 493             {
 494               if (dump_enabled_p ())
 495                 {
 496                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 497                                    "not vectorized: unsupported data-type ");
 498                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 499                                      scalar_type);
 500                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 501                 }
 502               return false;
 503             }
 504
 505           if ((GET_MODE_SIZE (TYPE_MODE (vectype))
 506                != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
 507             {
 508               if (dump_enabled_p ())
 509                 {
 510                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 511                                    "not vectorized: different sized vector "
 512                                    "types in statement, ");
 513                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 514                                      vectype);
 515                   dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
 516                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
 517                                      vf_vectype);
 518                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
 519                 }
 520               return false;
 521             }
 522
 523           if (dump_enabled_p ())
 524             {
 525               dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
 526               dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
 527               dump_printf (MSG_NOTE, "\n");
 528             }
 529
 530           nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
 531           if (dump_enabled_p ())
 532             dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
 533           if (!vectorization_factor
 534               || (nunits > vectorization_factor))
 535             vectorization_factor = nunits;
 536
 537           if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
 538             {
 539               pattern_def_seq = NULL;
 540               gsi_next (&si);
 541             }
 542         }
 543     }
 544
 545   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 546   if (dump_enabled_p ())
 547     dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
 548                      vectorization_factor);
 549   if (vectorization_factor <= 1)
 550     {
 551       if (dump_enabled_p ())
 552         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 553                          "not vectorized: unsupported data-type\n");
 554       return false;
 555     }
 556   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
 557
 558   return true;
 559 }
 560
 561
 562 /* Function vect_is_simple_iv_evolution.
 563
 564    FORNOW: A simple evolution of an induction variables in the loop is
 565    considered a polynomial evolution.  */
 566
 567 static bool
 568 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
 569                              tree * step)
 570 {
 571   tree init_expr;
 572   tree step_expr;
 573   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
 574   basic_block bb;
 575
 576   /* When there is no evolution in this loop, the evolution function
 577      is not "simple".  */
 578   if (evolution_part == NULL_TREE)
 579     return false;
 580
 581   /* When the evolution is a polynomial of degree >= 2
 582      the evolution function is not "simple".  */
 583   if (tree_is_chrec (evolution_part))
 584     return false;
 585
 586   step_expr = evolution_part;
 587   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
 588
 589   if (dump_enabled_p ())
 590     {
 591       dump_printf_loc (MSG_NOTE, vect_location, "step: ");
 592       dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
 593       dump_printf (MSG_NOTE, ",  init: ");
 594       dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
 595       dump_printf (MSG_NOTE, "\n");
 596     }
 597
 598   *init = init_expr;
 599   *step = step_expr;
 600
 601   if (TREE_CODE (step_expr) != INTEGER_CST
 602       && (TREE_CODE (step_expr) != SSA_NAME
 603           || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
 604               && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
 605           || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
 606               && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
 607                   || !flag_associative_math)))
 608       && (TREE_CODE (step_expr) != REAL_CST
 609           || !flag_associative_math))
 610     {
 611       if (dump_enabled_p ())
 612         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 613                          "step unknown.\n");
 614       return false;
 615     }
 616
 617   return true;
 618 }
 619
 620 /* Function vect_analyze_scalar_cycles_1.
 621
 622    Examine the cross iteration def-use cycles of scalar variables
 623    in LOOP.  LOOP_VINFO represents the loop that is now being
 624    considered for vectorization (can be LOOP, or an outer-loop
 625    enclosing LOOP).  */
 626
 627 static void
 628 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
 629 {
 630   basic_block bb = loop->header;
 631   tree init, step;
 632   auto_vec<gimple, 64> worklist;
 633   gphi_iterator gsi;
 634   bool double_reduc;
 635
 636   if (dump_enabled_p ())
 637     dump_printf_loc (MSG_NOTE, vect_location,
 638                      "=== vect_analyze_scalar_cycles ===\n");
 639
 640   /* First - identify all inductions.  Reduction detection assumes that all the
 641      inductions have been identified, therefore, this order must not be
 642      changed.  */
 643   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 644     {
 645       gphi *phi = gsi.phi ();
 646       tree access_fn = NULL;
 647       tree def = PHI_RESULT (phi);
 648       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 649
 650       if (dump_enabled_p ())
 651         {
 652           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 653           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 654           dump_printf (MSG_NOTE, "\n");
 655         }
 656
 657       /* Skip virtual phi's.  The data dependences that are associated with
 658          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
 659       if (virtual_operand_p (def))
 660         continue;
 661
 662       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
 663
 664       /* Analyze the evolution function.  */
 665       access_fn = analyze_scalar_evolution (loop, def);
 666       if (access_fn)
 667         {
 668           STRIP_NOPS (access_fn);
 669           if (dump_enabled_p ())
 670             {
 671               dump_printf_loc (MSG_NOTE, vect_location,
 672                                "Access function of PHI: ");
 673               dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
 674               dump_printf (MSG_NOTE, "\n");
 675             }
 676           STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
 677             = evolution_part_in_loop_num (access_fn, loop->num);
 678         }
 679
 680       if (!access_fn
 681           || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
 682           || (LOOP_VINFO_LOOP (loop_vinfo) != loop
 683               && TREE_CODE (step) != INTEGER_CST))
 684         {
 685           worklist.safe_push (phi);
 686           continue;
 687         }
 688
 689       gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
 690
 691       if (dump_enabled_p ())
 692         dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
 693       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
 694     }
 695
 696
 697   /* Second - identify all reductions and nested cycles.  */
 698   while (worklist.length () > 0)
 699     {
 700       gimple phi = worklist.pop ();
 701       tree def = PHI_RESULT (phi);
 702       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
 703       gimple reduc_stmt;
 704       bool nested_cycle;
 705
 706       if (dump_enabled_p ())
 707         {
 708           dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
 709           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
 710           dump_printf (MSG_NOTE, "\n");
 711         }
 712
 713       gcc_assert (!virtual_operand_p (def)
 714                   && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 715
 716       nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
 717       reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
 718                                                 &double_reduc, false);
 719       if (reduc_stmt)
 720         {
 721           if (double_reduc)
 722             {
 723               if (dump_enabled_p ())
 724                 dump_printf_loc (MSG_NOTE, vect_location,
 725                                  "Detected double reduction.\n");
 726
 727               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
 728               STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 729                                                     vect_double_reduction_def;
 730             }
 731           else
 732             {
 733               if (nested_cycle)
 734                 {
 735                   if (dump_enabled_p ())
 736                     dump_printf_loc (MSG_NOTE, vect_location,
 737                                      "Detected vectorizable nested cycle.\n");
 738
 739                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
 740                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 741                                                              vect_nested_cycle;
 742                 }
 743               else
 744                 {
 745                   if (dump_enabled_p ())
 746                     dump_printf_loc (MSG_NOTE, vect_location,
 747                                      "Detected reduction.\n");
 748
 749                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
 750                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
 751                                                            vect_reduction_def;
 752                   /* Store the reduction cycles for possible vectorization in
 753                      loop-aware SLP.  */
 754                   LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
 755                 }
 756             }
 757         }
 758       else
 759         if (dump_enabled_p ())
 760           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 761                            "Unknown def-use cycle pattern.\n");
 762     }
 763 }
 764
 765
 766 /* Function vect_analyze_scalar_cycles.
 767
 768    Examine the cross iteration def-use cycles of scalar variables, by
 769    analyzing the loop-header PHIs of scalar variables.  Classify each
 770    cycle as one of the following: invariant, induction, reduction, unknown.
 771    We do that for the loop represented by LOOP_VINFO, and also to its
 772    inner-loop, if exists.
 773    Examples for scalar cycles:
 774
 775    Example1: reduction:
 776
 777               loop1:
 778               for (i=0; i<N; i++)
 779                  sum += a[i];
 780
 781    Example2: induction:
 782
 783               loop2:
 784               for (i=0; i<N; i++)
 785                  a[i] = i;  */
 786
 787 static void
 788 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
 789 {
 790   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 791
 792   vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
 793
 794   /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
 795      Reductions in such inner-loop therefore have different properties than
 796      the reductions in the nest that gets vectorized:
 797      1. When vectorized, they are executed in the same order as in the original
 798         scalar loop, so we can't change the order of computation when
 799         vectorizing them.
 800      2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
 801         current checks are too strict.  */
 802
 803   if (loop->inner)
 804     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 805 }
 806
 807 /* Transfer group and reduction information from STMT to its pattern stmt.  */
 808
 809 static void
 810 vect_fixup_reduc_chain (gimple stmt)
 811 {
 812   gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 813   gimple stmtp;
 814   gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
 815               && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
 816   GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
 817   do
 818     {
 819       stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 820       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
 821       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
 822       if (stmt)
 823         GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
 824           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
 825     }
 826   while (stmt);
 827   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
 828 }
 829
 830 /* Fixup scalar cycles that now have their stmts detected as patterns.  */
 831
 832 static void
 833 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
 834 {
 835   gimple first;
 836   unsigned i;
 837
 838   FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
 839     if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
 840       {
 841         vect_fixup_reduc_chain (first);
 842         LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
 843           = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
 844       }
 845 }
 846
 847 /* Function vect_get_loop_niters.
 848
 849    Determine how many iterations the loop is executed and place it
 850    in NUMBER_OF_ITERATIONS.  Place the number of latch iterations
 851    in NUMBER_OF_ITERATIONSM1.
 852
 853    Return the loop exit condition.  */
 854
 855
 856 static gcond *
 857 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
 858                       tree *number_of_iterationsm1)
 859 {
 860   tree niters;
 861
 862   if (dump_enabled_p ())
 863     dump_printf_loc (MSG_NOTE, vect_location,
 864                      "=== get_loop_niters ===\n");
 865
 866   niters = number_of_latch_executions (loop);
 867   *number_of_iterationsm1 = niters;
 868
 869   /* We want the number of loop header executions which is the number
 870      of latch executions plus one.
 871      ???  For UINT_MAX latch executions this number overflows to zero
 872      for loops like do { n++; } while (n != 0);  */
 873   if (niters && !chrec_contains_undetermined (niters))
 874     niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
 875                           build_int_cst (TREE_TYPE (niters), 1));
 876   *number_of_iterations = niters;
 877
 878   return get_loop_exit_condition (loop);
 879 }
 880
 881
 882 /* Function bb_in_loop_p
 883
 884    Used as predicate for dfs order traversal of the loop bbs.  */
 885
 886 static bool
 887 bb_in_loop_p (const_basic_block bb, const void *data)
 888 {
 889   const struct loop *const loop = (const struct loop *)data;
 890   if (flow_bb_inside_loop_p (loop, bb))
 891     return true;
 892   return false;
 893 }
 894
 895
 896 /* Function new_loop_vec_info.
 897
 898    Create and initialize a new loop_vec_info struct for LOOP, as well as
 899    stmt_vec_info structs for all the stmts in LOOP.  */
 900
 901 static loop_vec_info
 902 new_loop_vec_info (struct loop *loop)
 903 {
 904   loop_vec_info res;
 905   basic_block *bbs;
 906   gimple_stmt_iterator si;
 907   unsigned int i, nbbs;
 908
 909   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
 910   LOOP_VINFO_LOOP (res) = loop;
 911
 912   bbs = get_loop_body (loop);
 913
 914   /* Create/Update stmt_info for all stmts in the loop.  */
 915   for (i = 0; i < loop->num_nodes; i++)
 916     {
 917       basic_block bb = bbs[i];
 918
 919       /* BBs in a nested inner-loop will have been already processed (because
 920          we will have called vect_analyze_loop_form for any nested inner-loop).
 921          Therefore, for stmts in an inner-loop we just want to update the
 922          STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
 923          loop_info of the outer-loop we are currently considering to vectorize
 924          (instead of the loop_info of the inner-loop).
 925          For stmts in other BBs we need to create a stmt_info from scratch.  */
 926       if (bb->loop_father != loop)
 927         {
 928           /* Inner-loop bb.  */
 929           gcc_assert (loop->inner && bb->loop_father == loop->inner);
 930           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 931             {
 932               gimple phi = gsi_stmt (si);
 933               stmt_vec_info stmt_info = vinfo_for_stmt (phi);
 934               loop_vec_info inner_loop_vinfo =
 935                 STMT_VINFO_LOOP_VINFO (stmt_info);
 936               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 937               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 938             }
 939           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 940            {
 941               gimple stmt = gsi_stmt (si);
 942               stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 943               loop_vec_info inner_loop_vinfo =
 944                  STMT_VINFO_LOOP_VINFO (stmt_info);
 945               gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
 946               STMT_VINFO_LOOP_VINFO (stmt_info) = res;
 947            }
 948         }
 949       else
 950         {
 951           /* bb in current nest.  */
 952           for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
 953             {
 954               gimple phi = gsi_stmt (si);
 955               gimple_set_uid (phi, 0);
 956               set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
 957             }
 958
 959           for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 960             {
 961               gimple stmt = gsi_stmt (si);
 962               gimple_set_uid (stmt, 0);
 963               set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
 964             }
 965         }
 966     }
 967
 968   /* CHECKME: We want to visit all BBs before their successors (except for
 969      latch blocks, for which this assertion wouldn't hold).  In the simple
 970      case of the loop forms we allow, a dfs order of the BBs would the same
 971      as reversed postorder traversal, so we are safe.  */
 972
 973    free (bbs);
 974    bbs = XCNEWVEC (basic_block, loop->num_nodes);
 975    nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
 976                               bbs, loop->num_nodes, loop);
 977    gcc_assert (nbbs == loop->num_nodes);
 978
 979   LOOP_VINFO_BBS (res) = bbs;
 980   LOOP_VINFO_NITERSM1 (res) = NULL;
 981   LOOP_VINFO_NITERS (res) = NULL;
 982   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
 983   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
 984   LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
 985   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
 986   LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
 987   LOOP_VINFO_VECT_FACTOR (res) = 0;
 988   LOOP_VINFO_LOOP_NEST (res).create (3);
 989   LOOP_VINFO_DATAREFS (res).create (10);
 990   LOOP_VINFO_DDRS (res).create (10 * 10);
 991   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
 992   LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
 993              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
 994   LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
 995              PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
 996   LOOP_VINFO_GROUPED_STORES (res).create (10);
 997   LOOP_VINFO_REDUCTIONS (res).create (10);
 998   LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
 999   LOOP_VINFO_SLP_INSTANCES (res).create (10);
1000   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
1001   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
1002   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
1003   LOOP_VINFO_PEELING_FOR_NITER (res) = false;
1004   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
1005
1006   return res;
1007 }
1008
1009
1010 /* Function destroy_loop_vec_info.
1011
1012    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1013    stmts in the loop.  */
1014
1015 void
1016 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
1017 {
1018   struct loop *loop;
1019   basic_block *bbs;
1020   int nbbs;
1021   gimple_stmt_iterator si;
1022   int j;
1023   vec<slp_instance> slp_instances;
1024   slp_instance instance;
1025   bool swapped;
1026
1027   if (!loop_vinfo)
1028     return;
1029
1030   loop = LOOP_VINFO_LOOP (loop_vinfo);
1031
1032   bbs = LOOP_VINFO_BBS (loop_vinfo);
1033   nbbs = clean_stmts ? loop->num_nodes : 0;
1034   swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
1035
1036   for (j = 0; j < nbbs; j++)
1037     {
1038       basic_block bb = bbs[j];
1039       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1040         free_stmt_vec_info (gsi_stmt (si));
1041
1042       for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1043         {
1044           gimple stmt = gsi_stmt (si);
1045
1046           /* We may have broken canonical form by moving a constant
1047              into RHS1 of a commutative op.  Fix such occurrences.  */
1048           if (swapped && is_gimple_assign (stmt))
1049             {
1050               enum tree_code code = gimple_assign_rhs_code (stmt);
1051
1052               if ((code == PLUS_EXPR
1053                    || code == POINTER_PLUS_EXPR
1054                    || code == MULT_EXPR)
1055                   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
1056                 swap_ssa_operands (stmt,
1057                                    gimple_assign_rhs1_ptr (stmt),
1058                                    gimple_assign_rhs2_ptr (stmt));
1059             }
1060
1061           /* Free stmt_vec_info.  */
1062           free_stmt_vec_info (stmt);
1063           gsi_next (&si);
1064         }
1065     }
1066
1067   free (LOOP_VINFO_BBS (loop_vinfo));
1068   vect_destroy_datarefs (loop_vinfo, NULL);
1069   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1070   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1071   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1072   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
1073   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074   FOR_EACH_VEC_ELT (slp_instances, j, instance)
1075     vect_free_slp_instance (instance);
1076
1077   LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1078   LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1079   LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1080   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
1081
1082   delete LOOP_VINFO_PEELING_HTAB (loop_vinfo);
1083   LOOP_VINFO_PEELING_HTAB (loop_vinfo) = NULL;
1084
1085   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1086   loop_vinfo->scalar_cost_vec.release ();
1087
1088   free (loop_vinfo);
1089   loop->aux = NULL;
1090 }
1091
1092
1093 /* Calculate the cost of one scalar iteration of the loop.  */
1094 static void
1095 vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1096 {
1097   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1098   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1099   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
1100   int innerloop_iters, i;
1101
1102   /* Count statements in scalar loop.  Using this as scalar cost for a single
1103      iteration for now.
1104
1105      TODO: Add outer loop support.
1106
1107      TODO: Consider assigning different costs to different scalar
1108      statements.  */
1109
1110   /* FORNOW.  */
1111   innerloop_iters = 1;
1112   if (loop->inner)
1113     innerloop_iters = 50; /* FIXME */
1114
1115   for (i = 0; i < nbbs; i++)
1116     {
1117       gimple_stmt_iterator si;
1118       basic_block bb = bbs[i];
1119
1120       if (bb->loop_father == loop->inner)
1121         factor = innerloop_iters;
1122       else
1123         factor = 1;
1124
1125       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1126         {
1127           gimple stmt = gsi_stmt (si);
1128           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1129
1130           if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1131             continue;
1132
1133           /* Skip stmts that are not vectorized inside the loop.  */
1134           if (stmt_info
1135               && !STMT_VINFO_RELEVANT_P (stmt_info)
1136               && (!STMT_VINFO_LIVE_P (stmt_info)
1137                   || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1138               && !STMT_VINFO_IN_PATTERN_P (stmt_info))
1139             continue;
1140
1141           vect_cost_for_stmt kind;
1142           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1143             {
1144               if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1145                kind = scalar_load;
1146              else
1147                kind = scalar_store;
1148             }
1149           else
1150             kind = scalar_stmt;
1151
1152           scalar_single_iter_cost
1153             += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1154                                  factor, kind, NULL, 0, vect_prologue);
1155         }
1156     }
1157   LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
1158     = scalar_single_iter_cost;
1159 }
1160
1161
1162 /* Function vect_analyze_loop_1.
1163
1164    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1165    for it. The different analyses will record information in the
1166    loop_vec_info struct.  This is a subset of the analyses applied in
1167    vect_analyze_loop, to be applied on an inner-loop nested in the loop
1168    that is now considered for (outer-loop) vectorization.  */
1169
1170 static loop_vec_info
1171 vect_analyze_loop_1 (struct loop *loop)
1172 {
1173   loop_vec_info loop_vinfo;
1174
1175   if (dump_enabled_p ())
1176     dump_printf_loc (MSG_NOTE, vect_location,
1177                      "===== analyze_loop_nest_1 =====\n");
1178
1179   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1180
1181   loop_vinfo = vect_analyze_loop_form (loop);
1182   if (!loop_vinfo)
1183     {
1184       if (dump_enabled_p ())
1185         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1186                          "bad inner-loop form.\n");
1187       return NULL;
1188     }
1189
1190   return loop_vinfo;
1191 }
1192
1193
1194 /* Function vect_analyze_loop_form.
1195
1196    Verify that certain CFG restrictions hold, including:
1197    - the loop has a pre-header
1198    - the loop has a single entry and exit
1199    - the loop exit condition is simple enough, and the number of iterations
1200      can be analyzed (a countable loop).  */
1201
1202 loop_vec_info
1203 vect_analyze_loop_form (struct loop *loop)
1204 {
1205   loop_vec_info loop_vinfo;
1206   gcond *loop_cond;
1207   tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
1208   loop_vec_info inner_loop_vinfo = NULL;
1209
1210   if (dump_enabled_p ())
1211     dump_printf_loc (MSG_NOTE, vect_location,
1212                      "=== vect_analyze_loop_form ===\n");
1213
1214   /* Different restrictions apply when we are considering an inner-most loop,
1215      vs. an outer (nested) loop.
1216      (FORNOW. May want to relax some of these restrictions in the future).  */
1217
1218   if (!loop->inner)
1219     {
1220       /* Inner-most loop.  We currently require that the number of BBs is
1221          exactly 2 (the header and latch).  Vectorizable inner-most loops
1222          look like this:
1223
1224                         (pre-header)
1225                            |
1226                           header <--------+
1227                            | |            |
1228                            | +--> latch --+
1229                            |
1230                         (exit-bb)  */
1231
1232       if (loop->num_nodes != 2)
1233         {
1234           if (dump_enabled_p ())
1235             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1236                              "not vectorized: control flow in loop.\n");
1237           return NULL;
1238         }
1239
1240       if (empty_block_p (loop->header))
1241         {
1242           if (dump_enabled_p ())
1243             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1244                              "not vectorized: empty loop.\n");
1245           return NULL;
1246         }
1247     }
1248   else
1249     {
1250       struct loop *innerloop = loop->inner;
1251       edge entryedge;
1252
1253       /* Nested loop. We currently require that the loop is doubly-nested,
1254          contains a single inner loop, and the number of BBs is exactly 5.
1255          Vectorizable outer-loops look like this:
1256
1257                         (pre-header)
1258                            |
1259                           header <---+
1260                            |         |
1261                           inner-loop |
1262                            |         |
1263                           tail ------+
1264                            |
1265                         (exit-bb)
1266
1267          The inner-loop has the properties expected of inner-most loops
1268          as described above.  */
1269
1270       if ((loop->inner)->inner || (loop->inner)->next)
1271         {
1272           if (dump_enabled_p ())
1273             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1274                              "not vectorized: multiple nested loops.\n");
1275           return NULL;
1276         }
1277
1278       /* Analyze the inner-loop.  */
1279       inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1280       if (!inner_loop_vinfo)
1281         {
1282           if (dump_enabled_p ())
1283             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1284                              "not vectorized: Bad inner loop.\n");
1285           return NULL;
1286         }
1287
1288       if (!expr_invariant_in_loop_p (loop,
1289                                         LOOP_VINFO_NITERS (inner_loop_vinfo)))
1290         {
1291           if (dump_enabled_p ())
1292             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1293                              "not vectorized: inner-loop count not"
1294                              " invariant.\n");
1295           destroy_loop_vec_info (inner_loop_vinfo, true);
1296           return NULL;
1297         }
1298
1299       if (loop->num_nodes != 5)
1300         {
1301           if (dump_enabled_p ())
1302             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1303                              "not vectorized: control flow in loop.\n");
1304           destroy_loop_vec_info (inner_loop_vinfo, true);
1305           return NULL;
1306         }
1307
1308       gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1309       entryedge = EDGE_PRED (innerloop->header, 0);
1310       if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1311         entryedge = EDGE_PRED (innerloop->header, 1);
1312
1313       if (entryedge->src != loop->header
1314           || !single_exit (innerloop)
1315           || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
1316         {
1317           if (dump_enabled_p ())
1318             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1319                              "not vectorized: unsupported outerloop form.\n");
1320           destroy_loop_vec_info (inner_loop_vinfo, true);
1321           return NULL;
1322         }
1323
1324       if (dump_enabled_p ())
1325         dump_printf_loc (MSG_NOTE, vect_location,
1326                          "Considering outer-loop vectorization.\n");
1327     }
1328
1329   if (!single_exit (loop)
1330       || EDGE_COUNT (loop->header->preds) != 2)
1331     {
1332       if (dump_enabled_p ())
1333         {
1334           if (!single_exit (loop))
1335             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1336                              "not vectorized: multiple exits.\n");
1337           else if (EDGE_COUNT (loop->header->preds) != 2)
1338             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1339                              "not vectorized: too many incoming edges.\n");
1340         }
1341       if (inner_loop_vinfo)
1342         destroy_loop_vec_info (inner_loop_vinfo, true);
1343       return NULL;
1344     }
1345
1346   /* We assume that the loop exit condition is at the end of the loop. i.e,
1347      that the loop is represented as a do-while (with a proper if-guard
1348      before the loop if needed), where the loop header contains all the
1349      executable statements, and the latch is empty.  */
1350   if (!empty_block_p (loop->latch)
1351       || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1352     {
1353       if (dump_enabled_p ())
1354         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1355                          "not vectorized: latch block not empty.\n");
1356       if (inner_loop_vinfo)
1357         destroy_loop_vec_info (inner_loop_vinfo, true);
1358       return NULL;
1359     }
1360
1361   /* Make sure there exists a single-predecessor exit bb:  */
1362   if (!single_pred_p (single_exit (loop)->dest))
1363     {
1364       edge e = single_exit (loop);
1365       if (!(e->flags & EDGE_ABNORMAL))
1366         {
1367           split_loop_exit_edge (e);
1368           if (dump_enabled_p ())
1369             dump_printf (MSG_NOTE, "split exit edge.\n");
1370         }
1371       else
1372         {
1373           if (dump_enabled_p ())
1374             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1375                              "not vectorized: abnormal loop exit edge.\n");
1376           if (inner_loop_vinfo)
1377             destroy_loop_vec_info (inner_loop_vinfo, true);
1378           return NULL;
1379         }
1380     }
1381
1382   loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
1383                                     &number_of_iterationsm1);
1384   if (!loop_cond)
1385     {
1386       if (dump_enabled_p ())
1387         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1388                          "not vectorized: complicated exit condition.\n");
1389       if (inner_loop_vinfo)
1390         destroy_loop_vec_info (inner_loop_vinfo, true);
1391       return NULL;
1392     }
1393
1394   if (!number_of_iterations
1395       || chrec_contains_undetermined (number_of_iterations))
1396     {
1397       if (dump_enabled_p ())
1398         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1399                          "not vectorized: number of iterations cannot be "
1400                          "computed.\n");
1401       if (inner_loop_vinfo)
1402         destroy_loop_vec_info (inner_loop_vinfo, true);
1403       return NULL;
1404     }
1405
1406   if (integer_zerop (number_of_iterations))
1407     {
1408       if (dump_enabled_p ())
1409         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1410                          "not vectorized: number of iterations = 0.\n");
1411       if (inner_loop_vinfo)
1412         destroy_loop_vec_info (inner_loop_vinfo, true);
1413       return NULL;
1414     }
1415
1416   loop_vinfo = new_loop_vec_info (loop);
1417   LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1418   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1419   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1420
1421   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1422     {
1423       if (dump_enabled_p ())
1424         {
1425           dump_printf_loc (MSG_NOTE, vect_location,
1426                            "Symbolic number of iterations is ");
1427           dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1428           dump_printf (MSG_NOTE, "\n");
1429         }
1430     }
1431
1432   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1433
1434   /* CHECKME: May want to keep it around it in the future.  */
1435   if (inner_loop_vinfo)
1436     destroy_loop_vec_info (inner_loop_vinfo, false);
1437
1438   gcc_assert (!loop->aux);
1439   loop->aux = loop_vinfo;
1440   return loop_vinfo;
1441 }
1442
1443 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1444    statements update the vectorization factor.  */
1445
1446 static void
1447 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1448 {
1449   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1450   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1451   int nbbs = loop->num_nodes;
1452   unsigned int vectorization_factor;
1453   int i;
1454
1455   if (dump_enabled_p ())
1456     dump_printf_loc (MSG_NOTE, vect_location,
1457                      "=== vect_update_vf_for_slp ===\n");
1458
1459   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1460   gcc_assert (vectorization_factor != 0);
1461
1462   /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1463      vectorization factor of the loop is the unrolling factor required by
1464      the SLP instances.  If that unrolling factor is 1, we say, that we
1465      perform pure SLP on loop - cross iteration parallelism is not
1466      exploited.  */
1467   bool only_slp_in_loop = true;
1468   for (i = 0; i < nbbs; i++)
1469     {
1470       basic_block bb = bbs[i];
1471       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1472            gsi_next (&si))
1473         {
1474           gimple stmt = gsi_stmt (si);
1475           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1476           if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1477               && STMT_VINFO_RELATED_STMT (stmt_info))
1478             {
1479               stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1480               stmt_info = vinfo_for_stmt (stmt);
1481             }
1482           if ((STMT_VINFO_RELEVANT_P (stmt_info)
1483                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1484               && !PURE_SLP_STMT (stmt_info))
1485             /* STMT needs both SLP and loop-based vectorization.  */
1486             only_slp_in_loop = false;
1487         }
1488     }
1489
1490   if (only_slp_in_loop)
1491     vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1492   else
1493     vectorization_factor
1494       = least_common_multiple (vectorization_factor,
1495                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1496
1497   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1498   if (dump_enabled_p ())
1499     dump_printf_loc (MSG_NOTE, vect_location,
1500                      "Updating vectorization factor to %d\n",
1501                      vectorization_factor);
1502 }
1503
1504 /* Function vect_analyze_loop_operations.
1505
1506    Scan the loop stmts and make sure they are all vectorizable.  */
1507
1508 static bool
1509 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1510 {
1511   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1512   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1513   int nbbs = loop->num_nodes;
1514   unsigned int vectorization_factor;
1515   int i;
1516   stmt_vec_info stmt_info;
1517   bool need_to_vectorize = false;
1518   int min_profitable_iters;
1519   int min_scalar_loop_bound;
1520   unsigned int th;
1521   bool ok;
1522   HOST_WIDE_INT max_niter;
1523   HOST_WIDE_INT estimated_niter;
1524   int min_profitable_estimate;
1525
1526   if (dump_enabled_p ())
1527     dump_printf_loc (MSG_NOTE, vect_location,
1528                      "=== vect_analyze_loop_operations ===\n");
1529
1530   for (i = 0; i < nbbs; i++)
1531     {
1532       basic_block bb = bbs[i];
1533
1534       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1535            gsi_next (&si))
1536         {
1537           gphi *phi = si.phi ();
1538           ok = true;
1539
1540           stmt_info = vinfo_for_stmt (phi);
1541           if (dump_enabled_p ())
1542             {
1543               dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1544               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1545               dump_printf (MSG_NOTE, "\n");
1546             }
1547
1548           /* Inner-loop loop-closed exit phi in outer-loop vectorization
1549              (i.e., a phi in the tail of the outer-loop).  */
1550           if (! is_loop_header_bb_p (bb))
1551             {
1552               /* FORNOW: we currently don't support the case that these phis
1553                  are not used in the outerloop (unless it is double reduction,
1554                  i.e., this phi is vect_reduction_def), cause this case
1555                  requires to actually do something here.  */
1556               if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1557                    || STMT_VINFO_LIVE_P (stmt_info))
1558                   && STMT_VINFO_DEF_TYPE (stmt_info)
1559                      != vect_double_reduction_def)
1560                 {
1561                   if (dump_enabled_p ())
1562                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1563                                      "Unsupported loop-closed phi in "
1564                                      "outer-loop.\n");
1565                   return false;
1566                 }
1567
1568               /* If PHI is used in the outer loop, we check that its operand
1569                  is defined in the inner loop.  */
1570               if (STMT_VINFO_RELEVANT_P (stmt_info))
1571                 {
1572                   tree phi_op;
1573                   gimple op_def_stmt;
1574
1575                   if (gimple_phi_num_args (phi) != 1)
1576                     return false;
1577
1578                   phi_op = PHI_ARG_DEF (phi, 0);
1579                   if (TREE_CODE (phi_op) != SSA_NAME)
1580                     return false;
1581
1582                   op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1583                   if (gimple_nop_p (op_def_stmt)
1584                       || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1585                       || !vinfo_for_stmt (op_def_stmt))
1586                     return false;
1587
1588                   if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1589                         != vect_used_in_outer
1590                       && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1591                            != vect_used_in_outer_by_reduction)
1592                     return false;
1593                 }
1594
1595               continue;
1596             }
1597
1598           gcc_assert (stmt_info);
1599
1600           if (STMT_VINFO_LIVE_P (stmt_info))
1601             {
1602               /* FORNOW: not yet supported.  */
1603               if (dump_enabled_p ())
1604                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1605                                  "not vectorized: value used after loop.\n");
1606               return false;
1607             }
1608
1609           if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1610               && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1611             {
1612               /* A scalar-dependence cycle that we don't support.  */
1613               if (dump_enabled_p ())
1614                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1615                                  "not vectorized: scalar dependence cycle.\n");
1616               return false;
1617             }
1618
1619           if (STMT_VINFO_RELEVANT_P (stmt_info))
1620             {
1621               need_to_vectorize = true;
1622               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1623                 ok = vectorizable_induction (phi, NULL, NULL);
1624             }
1625
1626           if (!ok)
1627             {
1628               if (dump_enabled_p ())
1629                 {
1630                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1631                                    "not vectorized: relevant phi not "
1632                                    "supported: ");
1633                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1634                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1635                 }
1636               return false;
1637             }
1638         }
1639
1640       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1641            gsi_next (&si))
1642         {
1643           gimple stmt = gsi_stmt (si);
1644           if (!gimple_clobber_p (stmt)
1645               && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1646             return false;
1647         }
1648     } /* bbs */
1649
1650   /* All operations in the loop are either irrelevant (deal with loop
1651      control, or dead), or only used outside the loop and can be moved
1652      out of the loop (e.g. invariants, inductions).  The loop can be
1653      optimized away by scalar optimizations.  We're better off not
1654      touching this loop.  */
1655   if (!need_to_vectorize)
1656     {
1657       if (dump_enabled_p ())
1658         dump_printf_loc (MSG_NOTE, vect_location,
1659                          "All the computation can be taken out of the loop.\n");
1660       if (dump_enabled_p ())
1661         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1662                          "not vectorized: redundant loop. no profit to "
1663                          "vectorize.\n");
1664       return false;
1665     }
1666
1667   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1668   gcc_assert (vectorization_factor != 0);
1669
1670   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1671     dump_printf_loc (MSG_NOTE, vect_location,
1672                      "vectorization_factor = %d, niters = "
1673                      HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
1674                      LOOP_VINFO_INT_NITERS (loop_vinfo));
1675
1676   if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1677        && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1678       || ((max_niter = max_stmt_executions_int (loop)) != -1
1679           && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1680     {
1681       if (dump_enabled_p ())
1682         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1683                          "not vectorized: iteration count too small.\n");
1684       if (dump_enabled_p ())
1685         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1686                          "not vectorized: iteration count smaller than "
1687                          "vectorization factor.\n");
1688       return false;
1689     }
1690
1691   /* Analyze cost.  Decide if worth while to vectorize.  */
1692
1693   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1694                                       &min_profitable_estimate);
1695   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1696
1697   if (min_profitable_iters < 0)
1698     {
1699       if (dump_enabled_p ())
1700         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1701                          "not vectorized: vectorization not profitable.\n");
1702       if (dump_enabled_p ())
1703         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1704                          "not vectorized: vector version will never be "
1705                          "profitable.\n");
1706       return false;
1707     }
1708
1709   min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1710                             * vectorization_factor) - 1);
1711
1712
1713   /* Use the cost model only if it is more conservative than user specified
1714      threshold.  */
1715
1716   th = (unsigned) min_scalar_loop_bound;
1717   if (min_profitable_iters
1718       && (!min_scalar_loop_bound
1719           || min_profitable_iters > min_scalar_loop_bound))
1720     th = (unsigned) min_profitable_iters;
1721
1722   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
1723
1724   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1725       && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1726     {
1727       if (dump_enabled_p ())
1728         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1729                          "not vectorized: vectorization not profitable.\n");
1730       if (dump_enabled_p ())
1731         dump_printf_loc (MSG_NOTE, vect_location,
1732                          "not vectorized: iteration count smaller than user "
1733                          "specified loop bound parameter or minimum profitable "
1734                          "iterations (whichever is more conservative).\n");
1735       return false;
1736     }
1737
1738   if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1739       && ((unsigned HOST_WIDE_INT) estimated_niter
1740           <= MAX (th, (unsigned)min_profitable_estimate)))
1741     {
1742       if (dump_enabled_p ())
1743         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1744                          "not vectorized: estimated iteration count too "
1745                          "small.\n");
1746       if (dump_enabled_p ())
1747         dump_printf_loc (MSG_NOTE, vect_location,
1748                          "not vectorized: estimated iteration count smaller "
1749                          "than specified loop bound parameter or minimum "
1750                          "profitable iterations (whichever is more "
1751                          "conservative).\n");
1752       return false;
1753     }
1754
1755   return true;
1756 }
1757
1758
1759 /* Function vect_analyze_loop_2.
1760
1761    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1762    for it.  The different analyses will record information in the
1763    loop_vec_info struct.  */
1764 static bool
1765 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1766 {
1767   bool ok;
1768   int max_vf = MAX_VECTORIZATION_FACTOR;
1769   int min_vf = 2;
1770   unsigned int th;
1771   unsigned int n_stmts = 0;
1772
1773   /* Find all data references in the loop (which correspond to vdefs/vuses)
1774      and analyze their evolution in the loop.  Also adjust the minimal
1775      vectorization factor according to the loads and stores.
1776
1777      FORNOW: Handle only simple, array references, which
1778      alignment can be forced, and aligned pointer-references.  */
1779
1780   ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf, &n_stmts);
1781   if (!ok)
1782     {
1783       if (dump_enabled_p ())
1784         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1785                          "bad data references.\n");
1786       return false;
1787     }
1788
1789   /* Classify all cross-iteration scalar data-flow cycles.
1790      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1791
1792   vect_analyze_scalar_cycles (loop_vinfo);
1793
1794   vect_pattern_recog (loop_vinfo, NULL);
1795
1796   vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
1797
1798   /* Analyze the access patterns of the data-refs in the loop (consecutive,
1799      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1800
1801   ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1802   if (!ok)
1803     {
1804       if (dump_enabled_p ())
1805         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806                          "bad data access.\n");
1807       return false;
1808     }
1809
1810   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1811
1812   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1813   if (!ok)
1814     {
1815       if (dump_enabled_p ())
1816         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817                          "unexpected pattern.\n");
1818       return false;
1819     }
1820
1821   /* Analyze data dependences between the data-refs in the loop
1822      and adjust the maximum vectorization factor according to
1823      the dependences.
1824      FORNOW: fail at the first data dependence that we encounter.  */
1825
1826   ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1827   if (!ok
1828       || max_vf < min_vf)
1829     {
1830       if (dump_enabled_p ())
1831             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832                              "bad data dependence.\n");
1833       return false;
1834     }
1835
1836   ok = vect_determine_vectorization_factor (loop_vinfo);
1837   if (!ok)
1838     {
1839       if (dump_enabled_p ())
1840         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1841                          "can't determine vectorization factor.\n");
1842       return false;
1843     }
1844   if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1845     {
1846       if (dump_enabled_p ())
1847         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848                          "bad data dependence.\n");
1849       return false;
1850     }
1851
1852   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1853   ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
1854   if (!ok)
1855     return false;
1856
1857   /* If there are any SLP instances mark them as pure_slp.  */
1858   bool slp = vect_make_slp_decision (loop_vinfo);
1859   if (slp)
1860     {
1861       /* Find stmts that need to be both vectorized and SLPed.  */
1862       vect_detect_hybrid_slp (loop_vinfo);
1863
1864       /* Update the vectorization factor based on the SLP decision.  */
1865       vect_update_vf_for_slp (loop_vinfo);
1866     }
1867
1868   /* Analyze the alignment of the data-refs in the loop.
1869      Fail if a data reference is found that cannot be vectorized.  */
1870
1871   ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1872   if (!ok)
1873     {
1874       if (dump_enabled_p ())
1875         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1876                          "bad data alignment.\n");
1877       return false;
1878     }
1879
1880   /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1881      It is important to call pruning after vect_analyze_data_ref_accesses,
1882      since we use grouping information gathered by interleaving analysis.  */
1883   ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1884   if (!ok)
1885     {
1886       if (dump_enabled_p ())
1887         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888                          "number of versioning for alias "
1889                          "run-time tests exceeds %d "
1890                          "(--param vect-max-version-for-alias-checks)\n",
1891                          PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1892       return false;
1893     }
1894
1895   /* Compute the scalar iteration cost.  */
1896   vect_get_single_scalar_iteration_cost (loop_vinfo);
1897
1898   /* This pass will decide on using loop versioning and/or loop peeling in
1899      order to enhance the alignment of data references in the loop.  */
1900
1901   ok = vect_enhance_data_refs_alignment (loop_vinfo);
1902   if (!ok)
1903     {
1904       if (dump_enabled_p ())
1905         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906                          "bad data alignment.\n");
1907       return false;
1908     }
1909
1910   if (slp)
1911     {
1912       /* Analyze operations in the SLP instances.  Note this may
1913          remove unsupported SLP instances which makes the above
1914          SLP kind detection invalid.  */
1915       unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
1916       vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1917                                    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1918       if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
1919         return false;
1920     }
1921
1922   /* Scan all the remaining operations in the loop that are not subject
1923      to SLP and make sure they are vectorizable.  */
1924   ok = vect_analyze_loop_operations (loop_vinfo);
1925   if (!ok)
1926     {
1927       if (dump_enabled_p ())
1928         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1929                          "bad operation or unsupported loop bound.\n");
1930       return false;
1931     }
1932
1933   /* Decide whether we need to create an epilogue loop to handle
1934      remaining scalar iterations.  */
1935   th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
1936         / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1937        * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1938
1939   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1940       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1941     {
1942       if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
1943                    - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1944           < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1945         LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1946     }
1947   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1948            || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1949                < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1950                /* In case of versioning, check if the maximum number of
1951                   iterations is greater than th.  If they are identical,
1952                   the epilogue is unnecessary.  */
1953                && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1954                     && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1955                    || (unsigned HOST_WIDE_INT)max_stmt_executions_int
1956                         (LOOP_VINFO_LOOP (loop_vinfo)) > th)))
1957     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1958
1959   /* If an epilogue loop is required make sure we can create one.  */
1960   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1961       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
1962     {
1963       if (dump_enabled_p ())
1964         dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
1965       if (!vect_can_advance_ivs_p (loop_vinfo)
1966           || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
1967                                            single_exit (LOOP_VINFO_LOOP
1968                                                          (loop_vinfo))))
1969         {
1970           if (dump_enabled_p ())
1971             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972                              "not vectorized: can't create required "
1973                              "epilog loop\n");
1974           return false;
1975         }
1976     }
1977
1978   return true;
1979 }
1980
1981 /* Function vect_analyze_loop.
1982
1983    Apply a set of analyses on LOOP, and create a loop_vec_info struct
1984    for it.  The different analyses will record information in the
1985    loop_vec_info struct.  */
1986 loop_vec_info
1987 vect_analyze_loop (struct loop *loop)
1988 {
1989   loop_vec_info loop_vinfo;
1990   unsigned int vector_sizes;
1991
1992   /* Autodetect first vector size we try.  */
1993   current_vector_size = 0;
1994   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1995
1996   if (dump_enabled_p ())
1997     dump_printf_loc (MSG_NOTE, vect_location,
1998                      "===== analyze_loop_nest =====\n");
1999
2000   if (loop_outer (loop)
2001       && loop_vec_info_for_loop (loop_outer (loop))
2002       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2003     {
2004       if (dump_enabled_p ())
2005         dump_printf_loc (MSG_NOTE, vect_location,
2006                          "outer-loop already vectorized.\n");
2007       return NULL;
2008     }
2009
2010   while (1)
2011     {
2012       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
2013       loop_vinfo = vect_analyze_loop_form (loop);
2014       if (!loop_vinfo)
2015         {
2016           if (dump_enabled_p ())
2017             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2018                              "bad loop form.\n");
2019           return NULL;
2020         }
2021
2022       if (vect_analyze_loop_2 (loop_vinfo))
2023         {
2024           LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2025
2026           return loop_vinfo;
2027         }
2028
2029       destroy_loop_vec_info (loop_vinfo, true);
2030
2031       vector_sizes &= ~current_vector_size;
2032       if (vector_sizes == 0
2033           || current_vector_size == 0)
2034         return NULL;
2035
2036       /* Try the next biggest vector size.  */
2037       current_vector_size = 1 << floor_log2 (vector_sizes);
2038       if (dump_enabled_p ())
2039         dump_printf_loc (MSG_NOTE, vect_location,
2040                          "***** Re-trying analysis with "
2041                          "vector size %d\n", current_vector_size);
2042     }
2043 }
2044
2045
2046 /* Function reduction_code_for_scalar_code
2047
2048    Input:
2049    CODE - tree_code of a reduction operations.
2050
2051    Output:
2052    REDUC_CODE - the corresponding tree-code to be used to reduce the
2053       vector of partial results into a single scalar result, or ERROR_MARK
2054       if the operation is a supported reduction operation, but does not have
2055       such a tree-code.
2056
2057    Return FALSE if CODE currently cannot be vectorized as reduction.  */
2058
2059 static bool
2060 reduction_code_for_scalar_code (enum tree_code code,
2061                                 enum tree_code *reduc_code)
2062 {
2063   switch (code)
2064     {
2065       case MAX_EXPR:
2066         *reduc_code = REDUC_MAX_EXPR;
2067         return true;
2068
2069       case MIN_EXPR:
2070         *reduc_code = REDUC_MIN_EXPR;
2071         return true;
2072
2073       case PLUS_EXPR:
2074         *reduc_code = REDUC_PLUS_EXPR;
2075         return true;
2076
2077       case MULT_EXPR:
2078       case MINUS_EXPR:
2079       case BIT_IOR_EXPR:
2080       case BIT_XOR_EXPR:
2081       case BIT_AND_EXPR:
2082         *reduc_code = ERROR_MARK;
2083         return true;
2084
2085       default:
2086        return false;
2087     }
2088 }
2089
2090
2091 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
2092    STMT is printed with a message MSG. */
2093
2094 static void
2095 report_vect_op (int msg_type, gimple stmt, const char *msg)
2096 {
2097   dump_printf_loc (msg_type, vect_location, "%s", msg);
2098   dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2099   dump_printf (msg_type, "\n");
2100 }
2101
2102
2103 /* Detect SLP reduction of the form:
2104
2105    #a1 = phi <a5, a0>
2106    a2 = operation (a1)
2107    a3 = operation (a2)
2108    a4 = operation (a3)
2109    a5 = operation (a4)
2110
2111    #a = phi <a5>
2112
2113    PHI is the reduction phi node (#a1 = phi <a5, a0> above)
2114    FIRST_STMT is the first reduction stmt in the chain
2115    (a2 = operation (a1)).
2116
2117    Return TRUE if a reduction chain was detected.  */
2118
2119 static bool
2120 vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
2121 {
2122   struct loop *loop = (gimple_bb (phi))->loop_father;
2123   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2124   enum tree_code code;
2125   gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
2126   stmt_vec_info use_stmt_info, current_stmt_info;
2127   tree lhs;
2128   imm_use_iterator imm_iter;
2129   use_operand_p use_p;
2130   int nloop_uses, size = 0, n_out_of_loop_uses;
2131   bool found = false;
2132
2133   if (loop != vect_loop)
2134     return false;
2135
2136   lhs = PHI_RESULT (phi);
2137   code = gimple_assign_rhs_code (first_stmt);
2138   while (1)
2139     {
2140       nloop_uses = 0;
2141       n_out_of_loop_uses = 0;
2142       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2143         {
2144           gimple use_stmt = USE_STMT (use_p);
2145           if (is_gimple_debug (use_stmt))
2146             continue;
2147
2148           /* Check if we got back to the reduction phi.  */
2149           if (use_stmt == phi)
2150             {
2151               loop_use_stmt = use_stmt;
2152               found = true;
2153               break;
2154             }
2155
2156           if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2157             {
2158               loop_use_stmt = use_stmt;
2159               nloop_uses++;
2160             }
2161            else
2162              n_out_of_loop_uses++;
2163
2164            /* There are can be either a single use in the loop or two uses in
2165               phi nodes.  */
2166            if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2167              return false;
2168         }
2169
2170       if (found)
2171         break;
2172
2173       /* We reached a statement with no loop uses.  */
2174       if (nloop_uses == 0)
2175         return false;
2176
2177       /* This is a loop exit phi, and we haven't reached the reduction phi.  */
2178       if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
2179         return false;
2180
2181       if (!is_gimple_assign (loop_use_stmt)
2182           || code != gimple_assign_rhs_code (loop_use_stmt)
2183           || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2184         return false;
2185
2186       /* Insert USE_STMT into reduction chain.  */
2187       use_stmt_info = vinfo_for_stmt (loop_use_stmt);
2188       if (current_stmt)
2189         {
2190           current_stmt_info = vinfo_for_stmt (current_stmt);
2191           GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
2192           GROUP_FIRST_ELEMENT (use_stmt_info)
2193             = GROUP_FIRST_ELEMENT (current_stmt_info);
2194         }
2195       else
2196         GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
2197
2198       lhs = gimple_assign_lhs (loop_use_stmt);
2199       current_stmt = loop_use_stmt;
2200       size++;
2201    }
2202
2203   if (!found || loop_use_stmt != phi || size < 2)
2204     return false;
2205
2206   /* Swap the operands, if needed, to make the reduction operand be the second
2207      operand.  */
2208   lhs = PHI_RESULT (phi);
2209   next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2210   while (next_stmt)
2211     {
2212       if (gimple_assign_rhs2 (next_stmt) == lhs)
2213         {
2214           tree op = gimple_assign_rhs1 (next_stmt);
2215           gimple def_stmt = NULL;
2216
2217           if (TREE_CODE (op) == SSA_NAME)
2218             def_stmt = SSA_NAME_DEF_STMT (op);
2219
2220           /* Check that the other def is either defined in the loop
2221              ("vect_internal_def"), or it's an induction (defined by a
2222              loop-header phi-node).  */
2223           if (def_stmt
2224               && gimple_bb (def_stmt)
2225               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2226               && (is_gimple_assign (def_stmt)
2227                   || is_gimple_call (def_stmt)
2228                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2229                            == vect_induction_def
2230                   || (gimple_code (def_stmt) == GIMPLE_PHI
2231                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2232                                   == vect_internal_def
2233                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2234             {
2235               lhs = gimple_assign_lhs (next_stmt);
2236               next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2237               continue;
2238             }
2239
2240           return false;
2241         }
2242       else
2243         {
2244           tree op = gimple_assign_rhs2 (next_stmt);
2245           gimple def_stmt = NULL;
2246
2247           if (TREE_CODE (op) == SSA_NAME)
2248             def_stmt = SSA_NAME_DEF_STMT (op);
2249
2250           /* Check that the other def is either defined in the loop
2251             ("vect_internal_def"), or it's an induction (defined by a
2252             loop-header phi-node).  */
2253           if (def_stmt
2254               && gimple_bb (def_stmt)
2255               && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2256               && (is_gimple_assign (def_stmt)
2257                   || is_gimple_call (def_stmt)
2258                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2259                               == vect_induction_def
2260                   || (gimple_code (def_stmt) == GIMPLE_PHI
2261                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2262                                   == vect_internal_def
2263                       && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2264             {
2265               if (dump_enabled_p ())
2266                 {
2267                   dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2268                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2269                   dump_printf (MSG_NOTE, "\n");
2270                 }
2271
2272               swap_ssa_operands (next_stmt,
2273                                  gimple_assign_rhs1_ptr (next_stmt),
2274                                  gimple_assign_rhs2_ptr (next_stmt));
2275               update_stmt (next_stmt);
2276
2277               if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2278                 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2279             }
2280           else
2281             return false;
2282         }
2283
2284       lhs = gimple_assign_lhs (next_stmt);
2285       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2286     }
2287
2288   /* Save the chain for further analysis in SLP detection.  */
2289   first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2290   LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
2291   GROUP_SIZE (vinfo_for_stmt (first)) = size;
2292
2293   return true;
2294 }
2295
2296
2297 /* Function vect_is_simple_reduction_1
2298
2299    (1) Detect a cross-iteration def-use cycle that represents a simple
2300    reduction computation.  We look for the following pattern:
2301
2302    loop_header:
2303      a1 = phi < a0, a2 >
2304      a3 = ...
2305      a2 = operation (a3, a1)
2306
2307    or
2308
2309    a3 = ...
2310    loop_header:
2311      a1 = phi < a0, a2 >
2312      a2 = operation (a3, a1)
2313
2314    such that:
2315    1. operation is commutative and associative and it is safe to
2316       change the order of the computation (if CHECK_REDUCTION is true)
2317    2. no uses for a2 in the loop (a2 is used out of the loop)
2318    3. no uses of a1 in the loop besides the reduction operation
2319    4. no uses of a1 outside the loop.
2320
2321    Conditions 1,4 are tested here.
2322    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
2323
2324    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2325    nested cycles, if CHECK_REDUCTION is false.
2326
2327    (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2328    reductions:
2329
2330      a1 = phi < a0, a2 >
2331      inner loop (def of a3)
2332      a2 = phi < a3 >
2333
2334    If MODIFY is true it tries also to rework the code in-place to enable
2335    detection of more reduction patterns.  For the time being we rewrite
2336    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
2337 */
2338
2339 static gimple
2340 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2341                             bool check_reduction, bool *double_reduc,
2342                             bool modify, bool need_wrapping_integral_overflow)
2343 {
2344   struct loop *loop = (gimple_bb (phi))->loop_father;
2345   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2346   edge latch_e = loop_latch_edge (loop);
2347   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2348   gimple def_stmt, def1 = NULL, def2 = NULL;
2349   enum tree_code orig_code, code;
2350   tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2351   tree type;
2352   int nloop_uses;
2353   tree name;
2354   imm_use_iterator imm_iter;
2355   use_operand_p use_p;
2356   bool phi_def;
2357
2358   *double_reduc = false;
2359
2360   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2361      otherwise, we assume outer loop vectorization.  */
2362   gcc_assert ((check_reduction && loop == vect_loop)
2363               || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
2364
2365   name = PHI_RESULT (phi);
2366   /* ???  If there are no uses of the PHI result the inner loop reduction
2367      won't be detected as possibly double-reduction by vectorizable_reduction
2368      because that tries to walk the PHI arg from the preheader edge which
2369      can be constant.  See PR60382.  */
2370   if (has_zero_uses (name))
2371     return NULL;
2372   nloop_uses = 0;
2373   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2374     {
2375       gimple use_stmt = USE_STMT (use_p);
2376       if (is_gimple_debug (use_stmt))
2377         continue;
2378
2379       if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2380         {
2381           if (dump_enabled_p ())
2382             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2383                              "intermediate value used outside loop.\n");
2384
2385           return NULL;
2386         }
2387
2388       nloop_uses++;
2389       if (nloop_uses > 1)
2390         {
2391           if (dump_enabled_p ())
2392             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2393                              "reduction used in loop.\n");
2394           return NULL;
2395         }
2396     }
2397
2398   if (TREE_CODE (loop_arg) != SSA_NAME)
2399     {
2400       if (dump_enabled_p ())
2401         {
2402           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2403                            "reduction: not ssa_name: ");
2404           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2405           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2406         }
2407       return NULL;
2408     }
2409
2410   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2411   if (!def_stmt)
2412     {
2413       if (dump_enabled_p ())
2414         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415                          "reduction: no def_stmt.\n");
2416       return NULL;
2417     }
2418
2419   if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
2420     {
2421       if (dump_enabled_p ())
2422         {
2423           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2424           dump_printf (MSG_NOTE, "\n");
2425         }
2426       return NULL;
2427     }
2428
2429   if (is_gimple_assign (def_stmt))
2430     {
2431       name = gimple_assign_lhs (def_stmt);
2432       phi_def = false;
2433     }
2434   else
2435     {
2436       name = PHI_RESULT (def_stmt);
2437       phi_def = true;
2438     }
2439
2440   nloop_uses = 0;
2441   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2442     {
2443       gimple use_stmt = USE_STMT (use_p);
2444       if (is_gimple_debug (use_stmt))
2445         continue;
2446       if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2447         nloop_uses++;
2448       if (nloop_uses > 1)
2449         {
2450           if (dump_enabled_p ())
2451             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2452                              "reduction used in loop.\n");
2453           return NULL;
2454         }
2455     }
2456
2457   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2458      defined in the inner loop.  */
2459   if (phi_def)
2460     {
2461       op1 = PHI_ARG_DEF (def_stmt, 0);
2462
2463       if (gimple_phi_num_args (def_stmt) != 1
2464           || TREE_CODE (op1) != SSA_NAME)
2465         {
2466           if (dump_enabled_p ())
2467             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468                              "unsupported phi node definition.\n");
2469
2470           return NULL;
2471         }
2472
2473       def1 = SSA_NAME_DEF_STMT (op1);
2474       if (gimple_bb (def1)
2475           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2476           && loop->inner
2477           && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2478           && is_gimple_assign (def1))
2479         {
2480           if (dump_enabled_p ())
2481             report_vect_op (MSG_NOTE, def_stmt,
2482                             "detected double reduction: ");
2483
2484           *double_reduc = true;
2485           return def_stmt;
2486         }
2487
2488       return NULL;
2489     }
2490
2491   code = orig_code = gimple_assign_rhs_code (def_stmt);
2492
2493   /* We can handle "res -= x[i]", which is non-associative by
2494      simply rewriting this into "res += -x[i]".  Avoid changing
2495      gimple instruction for the first simple tests and only do this
2496      if we're allowed to change code at all.  */
2497   if (code == MINUS_EXPR
2498       && modify
2499       && (op1 = gimple_assign_rhs1 (def_stmt))
2500       && TREE_CODE (op1) == SSA_NAME
2501       && SSA_NAME_DEF_STMT (op1) == phi)
2502     code = PLUS_EXPR;
2503
2504   if (check_reduction
2505       && (!commutative_tree_code (code) || !associative_tree_code (code)))
2506     {
2507       if (dump_enabled_p ())
2508         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2509                         "reduction: not commutative/associative: ");
2510       return NULL;
2511     }
2512
2513   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
2514     {
2515       if (code != COND_EXPR)
2516         {
2517           if (dump_enabled_p ())
2518             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2519                             "reduction: not binary operation: ");
2520
2521           return NULL;
2522         }
2523
2524       op3 = gimple_assign_rhs1 (def_stmt);
2525       if (COMPARISON_CLASS_P (op3))
2526         {
2527           op4 = TREE_OPERAND (op3, 1);
2528           op3 = TREE_OPERAND (op3, 0);
2529         }
2530
2531       op1 = gimple_assign_rhs2 (def_stmt);
2532       op2 = gimple_assign_rhs3 (def_stmt);
2533
2534       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2535         {
2536           if (dump_enabled_p ())
2537             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2538                             "reduction: uses not ssa_names: ");
2539
2540           return NULL;
2541         }
2542     }
2543   else
2544     {
2545       op1 = gimple_assign_rhs1 (def_stmt);
2546       op2 = gimple_assign_rhs2 (def_stmt);
2547
2548       if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2549         {
2550           if (dump_enabled_p ())
2551             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2552                             "reduction: uses not ssa_names: ");
2553
2554           return NULL;
2555         }
2556    }
2557
2558   type = TREE_TYPE (gimple_assign_lhs (def_stmt));
2559   if ((TREE_CODE (op1) == SSA_NAME
2560        && !types_compatible_p (type,TREE_TYPE (op1)))
2561       || (TREE_CODE (op2) == SSA_NAME
2562           && !types_compatible_p (type, TREE_TYPE (op2)))
2563       || (op3 && TREE_CODE (op3) == SSA_NAME
2564           && !types_compatible_p (type, TREE_TYPE (op3)))
2565       || (op4 && TREE_CODE (op4) == SSA_NAME
2566           && !types_compatible_p (type, TREE_TYPE (op4))))
2567     {
2568       if (dump_enabled_p ())
2569         {
2570           dump_printf_loc (MSG_NOTE, vect_location,
2571                            "reduction: multiple types: operation type: ");
2572           dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2573           dump_printf (MSG_NOTE, ", operands types: ");
2574           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2575                              TREE_TYPE (op1));
2576           dump_printf (MSG_NOTE, ",");
2577           dump_generic_expr (MSG_NOTE, TDF_SLIM,
2578                              TREE_TYPE (op2));
2579           if (op3)
2580             {
2581               dump_printf (MSG_NOTE, ",");
2582               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2583                                  TREE_TYPE (op3));
2584             }
2585
2586           if (op4)
2587             {
2588               dump_printf (MSG_NOTE, ",");
2589               dump_generic_expr (MSG_NOTE, TDF_SLIM,
2590                                  TREE_TYPE (op4));
2591             }
2592           dump_printf (MSG_NOTE, "\n");
2593         }
2594
2595       return NULL;
2596     }
2597
2598   /* Check that it's ok to change the order of the computation.
2599      Generally, when vectorizing a reduction we change the order of the
2600      computation.  This may change the behavior of the program in some
2601      cases, so we need to check that this is ok.  One exception is when
2602      vectorizing an outer-loop: the inner-loop is executed sequentially,
2603      and therefore vectorizing reductions in the inner-loop during
2604      outer-loop vectorization is safe.  */
2605
2606   /* CHECKME: check for !flag_finite_math_only too?  */
2607   if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
2608       && check_reduction)
2609     {
2610       /* Changing the order of operations changes the semantics.  */
2611       if (dump_enabled_p ())
2612         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2613                         "reduction: unsafe fp math optimization: ");
2614       return NULL;
2615     }
2616   else if (INTEGRAL_TYPE_P (type) && check_reduction)
2617     {
2618       if (!operation_no_trapping_overflow (type, code))
2619         {
2620           /* Changing the order of operations changes the semantics.  */
2621           if (dump_enabled_p ())
2622             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2623                             "reduction: unsafe int math optimization"
2624                             " (overflow traps): ");
2625           return NULL;
2626         }
2627       if (need_wrapping_integral_overflow
2628           && !TYPE_OVERFLOW_WRAPS (type)
2629           && operation_can_overflow (code))
2630         {
2631           /* Changing the order of operations changes the semantics.  */
2632           if (dump_enabled_p ())
2633             report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2634                             "reduction: unsafe int math optimization"
2635                             " (overflow doesn't wrap): ");
2636           return NULL;
2637         }
2638     }
2639   else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
2640     {
2641       /* Changing the order of operations changes the semantics.  */
2642       if (dump_enabled_p ())
2643         report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2644                         "reduction: unsafe fixed-point math optimization: ");
2645       return NULL;
2646     }
2647
2648   /* If we detected "res -= x[i]" earlier, rewrite it into
2649      "res += -x[i]" now.  If this turns out to be useless reassoc
2650      will clean it up again.  */
2651   if (orig_code == MINUS_EXPR)
2652     {
2653       tree rhs = gimple_assign_rhs2 (def_stmt);
2654       tree negrhs = make_ssa_name (TREE_TYPE (rhs));
2655       gimple negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
2656       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2657       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2658                                                           loop_info, NULL));
2659       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2660       gimple_assign_set_rhs2 (def_stmt, negrhs);
2661       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2662       update_stmt (def_stmt);
2663     }
2664
2665   /* Reduction is safe. We're dealing with one of the following:
2666      1) integer arithmetic and no trapv
2667      2) floating point arithmetic, and special flags permit this optimization
2668      3) nested cycle (i.e., outer loop vectorization).  */
2669   if (TREE_CODE (op1) == SSA_NAME)
2670     def1 = SSA_NAME_DEF_STMT (op1);
2671
2672   if (TREE_CODE (op2) == SSA_NAME)
2673     def2 = SSA_NAME_DEF_STMT (op2);
2674
2675   if (code != COND_EXPR
2676       && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
2677     {
2678       if (dump_enabled_p ())
2679         report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
2680       return NULL;
2681     }
2682
2683   /* Check that one def is the reduction def, defined by PHI,
2684      the other def is either defined in the loop ("vect_internal_def"),
2685      or it's an induction (defined by a loop-header phi-node).  */
2686
2687   if (def2 && def2 == phi
2688       && (code == COND_EXPR
2689           || !def1 || gimple_nop_p (def1)
2690           || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
2691           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2692               && (is_gimple_assign (def1)
2693                   || is_gimple_call (def1)
2694                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2695                       == vect_induction_def
2696                   || (gimple_code (def1) == GIMPLE_PHI
2697                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2698                           == vect_internal_def
2699                       && !is_loop_header_bb_p (gimple_bb (def1)))))))
2700     {
2701       if (dump_enabled_p ())
2702         report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2703       return def_stmt;
2704     }
2705
2706   if (def1 && def1 == phi
2707       && (code == COND_EXPR
2708           || !def2 || gimple_nop_p (def2)
2709           || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
2710           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2711               && (is_gimple_assign (def2)
2712                   || is_gimple_call (def2)
2713                   || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2714                       == vect_induction_def
2715                   || (gimple_code (def2) == GIMPLE_PHI
2716                       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2717                           == vect_internal_def
2718                       && !is_loop_header_bb_p (gimple_bb (def2)))))))
2719     {
2720       if (check_reduction)
2721         {
2722           /* Swap operands (just for simplicity - so that the rest of the code
2723              can assume that the reduction variable is always the last (second)
2724              argument).  */
2725           if (dump_enabled_p ())
2726             report_vect_op (MSG_NOTE, def_stmt,
2727                             "detected reduction: need to swap operands: ");
2728
2729           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2730                              gimple_assign_rhs2_ptr (def_stmt));
2731
2732           if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2733             LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
2734         }
2735       else
2736         {
2737           if (dump_enabled_p ())
2738             report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
2739         }
2740
2741       return def_stmt;
2742     }
2743
2744   /* Try to find SLP reduction chain.  */
2745   if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
2746     {
2747       if (dump_enabled_p ())
2748         report_vect_op (MSG_NOTE, def_stmt,
2749                         "reduction: detected reduction chain: ");
2750
2751       return def_stmt;
2752     }
2753
2754   if (dump_enabled_p ())
2755     report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2756                     "reduction: unknown pattern: ");
2757
2758   return NULL;
2759 }
2760
2761 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2762    in-place.  Arguments as there.  */
2763
2764 static gimple
2765 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2766                           bool check_reduction, bool *double_reduc,
2767                           bool need_wrapping_integral_overflow)
2768 {
2769   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2770                                      double_reduc, false,
2771                                      need_wrapping_integral_overflow);
2772 }
2773
2774 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2775    in-place if it enables detection of more reductions.  Arguments
2776    as there.  */
2777
2778 gimple
2779 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2780                              bool check_reduction, bool *double_reduc,
2781                              bool need_wrapping_integral_overflow)
2782 {
2783   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2784                                      double_reduc, true,
2785                                      need_wrapping_integral_overflow);
2786 }
2787
2788 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
2789 int
2790 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2791                              int *peel_iters_epilogue,
2792                              stmt_vector_for_cost *scalar_cost_vec,
2793                              stmt_vector_for_cost *prologue_cost_vec,
2794                              stmt_vector_for_cost *epilogue_cost_vec)
2795 {
2796   int retval = 0;
2797   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2798
2799   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2800     {
2801       *peel_iters_epilogue = vf/2;
2802       if (dump_enabled_p ())
2803         dump_printf_loc (MSG_NOTE, vect_location,
2804                          "cost model: epilogue peel iters set to vf/2 "
2805                          "because loop iterations are unknown .\n");
2806
2807       /* If peeled iterations are known but number of scalar loop
2808          iterations are unknown, count a taken branch per peeled loop.  */
2809       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2810                                  NULL, 0, vect_prologue);
2811       retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
2812                                  NULL, 0, vect_epilogue);
2813     }
2814   else
2815     {
2816       int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2817       peel_iters_prologue = niters < peel_iters_prologue ?
2818                             niters : peel_iters_prologue;
2819       *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2820       /* If we need to peel for gaps, but no peeling is required, we have to
2821          peel VF iterations.  */
2822       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2823         *peel_iters_epilogue = vf;
2824     }
2825
2826   stmt_info_for_cost *si;
2827   int j;
2828   if (peel_iters_prologue)
2829     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2830       retval += record_stmt_cost (prologue_cost_vec,
2831                                   si->count * peel_iters_prologue,
2832                                   si->kind, NULL, si->misalign,
2833                                   vect_prologue);
2834   if (*peel_iters_epilogue)
2835     FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
2836       retval += record_stmt_cost (epilogue_cost_vec,
2837                                   si->count * *peel_iters_epilogue,
2838                                   si->kind, NULL, si->misalign,
2839                                   vect_epilogue);
2840
2841   return retval;
2842 }
2843
2844 /* Function vect_estimate_min_profitable_iters
2845
2846    Return the number of iterations required for the vector version of the
2847    loop to be profitable relative to the cost of the scalar version of the
2848    loop.  */
2849
2850 static void
2851 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2852                                     int *ret_min_profitable_niters,
2853                                     int *ret_min_profitable_estimate)
2854 {
2855   int min_profitable_iters;
2856   int min_profitable_estimate;
2857   int peel_iters_prologue;
2858   int peel_iters_epilogue;
2859   unsigned vec_inside_cost = 0;
2860   int vec_outside_cost = 0;
2861   unsigned vec_prologue_cost = 0;
2862   unsigned vec_epilogue_cost = 0;
2863   int scalar_single_iter_cost = 0;
2864   int scalar_outside_cost = 0;
2865   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2866   int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2867   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2868
2869   /* Cost model disabled.  */
2870   if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2871     {
2872       dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
2873       *ret_min_profitable_niters = 0;
2874       *ret_min_profitable_estimate = 0;
2875       return;
2876     }
2877
2878   /* Requires loop versioning tests to handle misalignment.  */
2879   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2880     {
2881       /*  FIXME: Make cost depend on complexity of individual check.  */
2882       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
2883       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2884                             vect_prologue);
2885       dump_printf (MSG_NOTE,
2886                    "cost model: Adding cost of checks for loop "
2887                    "versioning to treat misalignment.\n");
2888     }
2889
2890   /* Requires loop versioning with alias checks.  */
2891   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2892     {
2893       /*  FIXME: Make cost depend on complexity of individual check.  */
2894       unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
2895       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2896                             vect_prologue);
2897       dump_printf (MSG_NOTE,
2898                    "cost model: Adding cost of checks for loop "
2899                    "versioning aliasing.\n");
2900     }
2901
2902   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2903       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2904     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2905                           vect_prologue);
2906
2907   /* Count statements in scalar loop.  Using this as scalar cost for a single
2908      iteration for now.
2909
2910      TODO: Add outer loop support.
2911
2912      TODO: Consider assigning different costs to different scalar
2913      statements.  */
2914
2915   scalar_single_iter_cost
2916     = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
2917
2918   /* Add additional cost for the peeled instructions in prologue and epilogue
2919      loop.
2920
2921      FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2922      at compile-time - we assume it's vf/2 (the worst would be vf-1).
2923
2924      TODO: Build an expression that represents peel_iters for prologue and
2925      epilogue to be used in a run-time test.  */
2926
2927   if (npeel  < 0)
2928     {
2929       peel_iters_prologue = vf/2;
2930       dump_printf (MSG_NOTE, "cost model: "
2931                    "prologue peel iters set to vf/2.\n");
2932
2933       /* If peeling for alignment is unknown, loop bound of main loop becomes
2934          unknown.  */
2935       peel_iters_epilogue = vf/2;
2936       dump_printf (MSG_NOTE, "cost model: "
2937                    "epilogue peel iters set to vf/2 because "
2938                    "peeling for alignment is unknown.\n");
2939
2940       /* If peeled iterations are unknown, count a taken branch and a not taken
2941          branch per peeled loop. Even if scalar loop iterations are known,
2942          vector iterations are not known since peeled prologue iterations are
2943          not known. Hence guards remain the same.  */
2944       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
2945                             NULL, 0, vect_prologue);
2946       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
2947                             NULL, 0, vect_prologue);
2948       (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
2949                             NULL, 0, vect_epilogue);
2950       (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
2951                             NULL, 0, vect_epilogue);
2952       stmt_info_for_cost *si;
2953       int j;
2954       FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
2955         {
2956           struct _stmt_vec_info *stmt_info
2957             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2958           (void) add_stmt_cost (target_cost_data,
2959                                 si->count * peel_iters_prologue,
2960                                 si->kind, stmt_info, si->misalign,
2961                                 vect_prologue);
2962           (void) add_stmt_cost (target_cost_data,
2963                                 si->count * peel_iters_epilogue,
2964                                 si->kind, stmt_info, si->misalign,
2965                                 vect_epilogue);
2966         }
2967     }
2968   else
2969     {
2970       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2971       stmt_info_for_cost *si;
2972       int j;
2973       void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2974
2975       prologue_cost_vec.create (2);
2976       epilogue_cost_vec.create (2);
2977       peel_iters_prologue = npeel;
2978
2979       (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2980                                           &peel_iters_epilogue,
2981                                           &LOOP_VINFO_SCALAR_ITERATION_COST
2982                                             (loop_vinfo),
2983                                           &prologue_cost_vec,
2984                                           &epilogue_cost_vec);
2985
2986       FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
2987         {
2988           struct _stmt_vec_info *stmt_info
2989             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2990           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2991                                 si->misalign, vect_prologue);
2992         }
2993
2994       FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
2995         {
2996           struct _stmt_vec_info *stmt_info
2997             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2998           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2999                                 si->misalign, vect_epilogue);
3000         }
3001
3002       prologue_cost_vec.release ();
3003       epilogue_cost_vec.release ();
3004     }
3005
3006   /* FORNOW: The scalar outside cost is incremented in one of the
3007      following ways:
3008
3009      1. The vectorizer checks for alignment and aliasing and generates
3010      a condition that allows dynamic vectorization.  A cost model
3011      check is ANDED with the versioning condition.  Hence scalar code
3012      path now has the added cost of the versioning check.
3013
3014        if (cost > th & versioning_check)
3015          jmp to vector code
3016
3017      Hence run-time scalar is incremented by not-taken branch cost.
3018
3019      2. The vectorizer then checks if a prologue is required.  If the
3020      cost model check was not done before during versioning, it has to
3021      be done before the prologue check.
3022
3023        if (cost <= th)
3024          prologue = scalar_iters
3025        if (prologue == 0)
3026          jmp to vector code
3027        else
3028          execute prologue
3029        if (prologue == num_iters)
3030          go to exit
3031
3032      Hence the run-time scalar cost is incremented by a taken branch,
3033      plus a not-taken branch, plus a taken branch cost.
3034
3035      3. The vectorizer then checks if an epilogue is required.  If the
3036      cost model check was not done before during prologue check, it
3037      has to be done with the epilogue check.
3038
3039        if (prologue == 0)
3040          jmp to vector code
3041        else
3042          execute prologue
3043        if (prologue == num_iters)
3044          go to exit
3045        vector code:
3046          if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
3047            jmp to epilogue
3048
3049      Hence the run-time scalar cost should be incremented by 2 taken
3050      branches.
3051
3052      TODO: The back end may reorder the BBS's differently and reverse
3053      conditions/branch directions.  Change the estimates below to
3054      something more reasonable.  */
3055
3056   /* If the number of iterations is known and we do not do versioning, we can
3057      decide whether to vectorize at compile time.  Hence the scalar version
3058      do not carry cost model guard costs.  */
3059   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3060       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3061       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3062     {
3063       /* Cost model check occurs at versioning.  */
3064       if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
3065           || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
3066         scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
3067       else
3068         {
3069           /* Cost model check occurs at prologue generation.  */
3070           if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
3071             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
3072               + vect_get_stmt_cost (cond_branch_not_taken);
3073           /* Cost model check occurs at epilogue generation.  */
3074           else
3075             scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
3076         }
3077     }
3078
3079   /* Complete the target-specific cost calculations.  */
3080   finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
3081                &vec_inside_cost, &vec_epilogue_cost);
3082
3083   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
3084
3085   if (dump_enabled_p ())
3086     {
3087       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3088       dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
3089                    vec_inside_cost);
3090       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
3091                    vec_prologue_cost);
3092       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
3093                    vec_epilogue_cost);
3094       dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
3095                    scalar_single_iter_cost);
3096       dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
3097                    scalar_outside_cost);
3098       dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
3099                    vec_outside_cost);
3100       dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
3101                    peel_iters_prologue);
3102       dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
3103                    peel_iters_epilogue);
3104     }
3105
3106   /* Calculate number of iterations required to make the vector version
3107      profitable, relative to the loop bodies only.  The following condition
3108      must hold true:
3109      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
3110      where
3111      SIC = scalar iteration cost, VIC = vector iteration cost,
3112      VOC = vector outside cost, VF = vectorization factor,
3113      PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3114      SOC = scalar outside cost for run time cost model check.  */
3115
3116   if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
3117     {
3118       if (vec_outside_cost <= 0)
3119         min_profitable_iters = 1;
3120       else
3121         {
3122           min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
3123                                   - vec_inside_cost * peel_iters_prologue
3124                                   - vec_inside_cost * peel_iters_epilogue)
3125                                  / ((scalar_single_iter_cost * vf)
3126                                     - vec_inside_cost);
3127
3128           if ((scalar_single_iter_cost * vf * min_profitable_iters)
3129               <= (((int) vec_inside_cost * min_profitable_iters)
3130                   + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
3131             min_profitable_iters++;
3132         }
3133     }
3134   /* vector version will never be profitable.  */
3135   else
3136     {
3137       if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3138         warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3139                     "did not happen for a simd loop");
3140
3141       if (dump_enabled_p ())
3142         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3143                          "cost model: the vector iteration cost = %d "
3144                          "divided by the scalar iteration cost = %d "
3145                          "is greater or equal to the vectorization factor = %d"
3146                          ".\n",
3147                          vec_inside_cost, scalar_single_iter_cost, vf);
3148       *ret_min_profitable_niters = -1;
3149       *ret_min_profitable_estimate = -1;
3150       return;
3151     }
3152
3153   dump_printf (MSG_NOTE,
3154                "  Calculated minimum iters for profitability: %d\n",
3155                min_profitable_iters);
3156
3157   min_profitable_iters =
3158         min_profitable_iters < vf ? vf : min_profitable_iters;
3159
3160   /* Because the condition we create is:
3161      if (niters <= min_profitable_iters)
3162        then skip the vectorized loop.  */
3163   min_profitable_iters--;
3164
3165   if (dump_enabled_p ())
3166     dump_printf_loc (MSG_NOTE, vect_location,
3167                      "  Runtime profitability threshold = %d\n",
3168                      min_profitable_iters);
3169
3170   *ret_min_profitable_niters = min_profitable_iters;
3171
3172   /* Calculate number of iterations required to make the vector version
3173      profitable, relative to the loop bodies only.
3174
3175      Non-vectorized variant is SIC * niters and it must win over vector
3176      variant on the expected loop trip count.  The following condition must hold true:
3177      SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC  */
3178
3179   if (vec_outside_cost <= 0)
3180     min_profitable_estimate = 1;
3181   else
3182     {
3183       min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3184                                  - vec_inside_cost * peel_iters_prologue
3185                                  - vec_inside_cost * peel_iters_epilogue)
3186                                  / ((scalar_single_iter_cost * vf)
3187                                    - vec_inside_cost);
3188     }
3189   min_profitable_estimate --;
3190   min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3191   if (dump_enabled_p ())
3192     dump_printf_loc (MSG_NOTE, vect_location,
3193                      "  Static estimate profitability threshold = %d\n",
3194                       min_profitable_iters);
3195
3196   *ret_min_profitable_estimate = min_profitable_estimate;
3197 }
3198
3199 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3200    vector elements (not bits) for a vector of mode MODE.  */
3201 static void
3202 calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3203                               unsigned char *sel)
3204 {
3205   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3206
3207   for (i = 0; i < nelt; i++)
3208     sel[i] = (i + offset) & (2*nelt - 1);
3209 }
3210
3211 /* Checks whether the target supports whole-vector shifts for vectors of mode
3212    MODE.  This is the case if _either_ the platform handles vec_shr_optab, _or_
3213    it supports vec_perm_const with masks for all necessary shift amounts.  */
3214 static bool
3215 have_whole_vector_shift (enum machine_mode mode)
3216 {
3217   if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3218     return true;
3219
3220   if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3221     return false;
3222
3223   unsigned int i, nelt = GET_MODE_NUNITS (mode);
3224   unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3225
3226   for (i = nelt/2; i >= 1; i/=2)
3227     {
3228       calc_vec_perm_mask_for_shift (mode, i, sel);
3229       if (!can_vec_perm_p (mode, false, sel))
3230         return false;
3231     }
3232   return true;
3233 }
3234
3235 /* Return the reduction operand (with index REDUC_INDEX) of STMT.  */
3236
3237 static tree
3238 get_reduction_op (gimple stmt, int reduc_index)
3239 {
3240   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3241     {
3242     case GIMPLE_SINGLE_RHS:
3243       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3244                   == ternary_op);
3245       return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3246     case GIMPLE_UNARY_RHS:
3247       return gimple_assign_rhs1 (stmt);
3248     case GIMPLE_BINARY_RHS:
3249       return (reduc_index
3250               ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt));
3251     case GIMPLE_TERNARY_RHS:
3252       return gimple_op (stmt, reduc_index + 1);
3253     default:
3254       gcc_unreachable ();
3255     }
3256 }
3257
3258 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
3259    functions. Design better to avoid maintenance issues.  */
3260
3261 /* Function vect_model_reduction_cost.
3262
3263    Models cost for a reduction operation, including the vector ops
3264    generated within the strip-mine loop, the initial definition before
3265    the loop, and the epilogue code that must be generated.  */
3266
3267 static bool
3268 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3269                            int ncopies, int reduc_index)
3270 {
3271   int prologue_cost = 0, epilogue_cost = 0;
3272   enum tree_code code;
3273   optab optab;
3274   tree vectype;
3275   gimple stmt, orig_stmt;
3276   tree reduction_op;
3277   machine_mode mode;
3278   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3279   struct loop *loop = NULL;
3280   void *target_cost_data;
3281
3282   if (loop_vinfo)
3283     {
3284       loop = LOOP_VINFO_LOOP (loop_vinfo);
3285       target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3286     }
3287   else
3288     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3289
3290   /* Cost of reduction op inside loop.  */
3291   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3292                                         stmt_info, 0, vect_body);
3293   stmt = STMT_VINFO_STMT (stmt_info);
3294
3295   reduction_op = get_reduction_op (stmt, reduc_index);
3296
3297   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3298   if (!vectype)
3299     {
3300       if (dump_enabled_p ())
3301         {
3302           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3303                            "unsupported data-type ");
3304           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3305                              TREE_TYPE (reduction_op));
3306           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3307         }
3308       return false;
3309    }
3310
3311   mode = TYPE_MODE (vectype);
3312   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3313
3314   if (!orig_stmt)
3315     orig_stmt = STMT_VINFO_STMT (stmt_info);
3316
3317   code = gimple_assign_rhs_code (orig_stmt);
3318
3319   /* Add in cost for initial definition.  */
3320   prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3321                                   stmt_info, 0, vect_prologue);
3322
3323   /* Determine cost of epilogue code.
3324
3325      We have a reduction operator that will reduce the vector in one statement.
3326      Also requires scalar extract.  */
3327
3328   if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
3329     {
3330       if (reduc_code != ERROR_MARK)
3331         {
3332           epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3333                                           stmt_info, 0, vect_epilogue);
3334           epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3335                                           stmt_info, 0, vect_epilogue);
3336         }
3337       else
3338         {
3339           int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3340           tree bitsize =
3341             TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
3342           int element_bitsize = tree_to_uhwi (bitsize);
3343           int nelements = vec_size_in_bits / element_bitsize;
3344
3345           optab = optab_for_tree_code (code, vectype, optab_default);
3346
3347           /* We have a whole vector shift available.  */
3348           if (VECTOR_MODE_P (mode)
3349               && optab_handler (optab, mode) != CODE_FOR_nothing
3350               && have_whole_vector_shift (mode))
3351             {
3352               /* Final reduction via vector shifts and the reduction operator.
3353                  Also requires scalar extract.  */
3354               epilogue_cost += add_stmt_cost (target_cost_data,
3355                                               exact_log2 (nelements) * 2,
3356                                               vector_stmt, stmt_info, 0,
3357                                               vect_epilogue);
3358               epilogue_cost += add_stmt_cost (target_cost_data, 1,
3359                                               vec_to_scalar, stmt_info, 0,
3360                                               vect_epilogue);
3361             }
3362           else
3363             /* Use extracts and reduction op for final reduction.  For N
3364                elements, we have N extracts and N-1 reduction ops.  */
3365             epilogue_cost += add_stmt_cost (target_cost_data,
3366                                             nelements + nelements - 1,
3367                                             vector_stmt, stmt_info, 0,
3368                                             vect_epilogue);
3369         }
3370     }
3371
3372   if (dump_enabled_p ())
3373     dump_printf (MSG_NOTE,
3374                  "vect_model_reduction_cost: inside_cost = %d, "
3375                  "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
3376                  prologue_cost, epilogue_cost);
3377
3378   return true;
3379 }
3380
3381
3382 /* Function vect_model_induction_cost.
3383
3384    Models cost for induction operations.  */
3385
3386 static void
3387 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3388 {
3389   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3390   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3391   unsigned inside_cost, prologue_cost;
3392
3393   /* loop cost for vec_loop.  */
3394   inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3395                                stmt_info, 0, vect_body);
3396
3397   /* prologue cost for vec_init and vec_step.  */
3398   prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3399                                  stmt_info, 0, vect_prologue);
3400
3401   if (dump_enabled_p ())
3402     dump_printf_loc (MSG_NOTE, vect_location,
3403                      "vect_model_induction_cost: inside_cost = %d, "
3404                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
3405 }
3406
3407
3408 /* Function get_initial_def_for_induction
3409
3410    Input:
3411    STMT - a stmt that performs an induction operation in the loop.
3412    IV_PHI - the initial value of the induction variable
3413
3414    Output:
3415    Return a vector variable, initialized with the first VF values of
3416    the induction variable.  E.g., for an iv with IV_PHI='X' and
3417    evolution S, for a vector of 4 units, we want to return:
3418    [X, X + S, X + 2*S, X + 3*S].  */
3419
3420 static tree
3421 get_initial_def_for_induction (gimple iv_phi)
3422 {
3423   stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3424   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3425   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3426   tree vectype;
3427   int nunits;
3428   edge pe = loop_preheader_edge (loop);
3429   struct loop *iv_loop;
3430   basic_block new_bb;
3431   tree new_vec, vec_init, vec_step, t;
3432   tree new_var;
3433   tree new_name;
3434   gimple init_stmt, new_stmt;
3435   gphi *induction_phi;
3436   tree induc_def, vec_def, vec_dest;
3437   tree init_expr, step_expr;
3438   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3439   int i;
3440   int ncopies;
3441   tree expr;
3442   stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3443   bool nested_in_vect_loop = false;
3444   gimple_seq stmts = NULL;
3445   imm_use_iterator imm_iter;
3446   use_operand_p use_p;
3447   gimple exit_phi;
3448   edge latch_e;
3449   tree loop_arg;
3450   gimple_stmt_iterator si;
3451   basic_block bb = gimple_bb (iv_phi);
3452   tree stepvectype;
3453   tree resvectype;
3454
3455   /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
3456   if (nested_in_vect_loop_p (loop, iv_phi))
3457     {
3458       nested_in_vect_loop = true;
3459       iv_loop = loop->inner;
3460     }
3461   else
3462     iv_loop = loop;
3463   gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3464
3465   latch_e = loop_latch_edge (iv_loop);
3466   loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3467
3468   step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3469   gcc_assert (step_expr != NULL_TREE);
3470
3471   pe = loop_preheader_edge (iv_loop);
3472   init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3473                                      loop_preheader_edge (iv_loop));
3474
3475   vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
3476   resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3477   gcc_assert (vectype);
3478   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3479   ncopies = vf / nunits;
3480
3481   gcc_assert (phi_info);
3482   gcc_assert (ncopies >= 1);
3483
3484   /* Convert the step to the desired type.  */
3485   step_expr = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3486                                                   step_expr),
3487                                     &stmts, true, NULL_TREE);
3488   if (stmts)
3489     {
3490       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3491       gcc_assert (!new_bb);
3492     }
3493
3494   /* Find the first insertion point in the BB.  */
3495   si = gsi_after_labels (bb);
3496
3497   /* Create the vector that holds the initial_value of the induction.  */
3498   if (nested_in_vect_loop)
3499     {
3500       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
3501          been created during vectorization of previous stmts.  We obtain it
3502          from the STMT_VINFO_VEC_STMT of the defining stmt.  */
3503       vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi, NULL);
3504       /* If the initial value is not of proper type, convert it.  */
3505       if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3506         {
3507           new_stmt
3508             = gimple_build_assign (vect_get_new_vect_var (vectype,
3509                                                           vect_simple_var,
3510                                                           "vec_iv_"),
3511                                    VIEW_CONVERT_EXPR,
3512                                    build1 (VIEW_CONVERT_EXPR, vectype,
3513                                            vec_init));
3514           vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3515           gimple_assign_set_lhs (new_stmt, vec_init);
3516           new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3517                                                  new_stmt);
3518           gcc_assert (!new_bb);
3519           set_vinfo_for_stmt (new_stmt,
3520                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3521         }
3522     }
3523   else
3524     {
3525       vec<constructor_elt, va_gc> *v;
3526
3527       /* iv_loop is the loop to be vectorized. Create:
3528          vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
3529       new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3530                                        vect_scalar_var, "var_");
3531       new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3532                                                      init_expr),
3533                                        &stmts, false, new_var);
3534       if (stmts)
3535         {
3536           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3537           gcc_assert (!new_bb);
3538         }
3539
3540       vec_alloc (v, nunits);
3541       bool constant_p = is_gimple_min_invariant (new_name);
3542       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3543       for (i = 1; i < nunits; i++)
3544         {
3545           /* Create: new_name_i = new_name + step_expr  */
3546           new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3547                                   new_name, step_expr);
3548           if (!is_gimple_min_invariant (new_name))
3549             {
3550               init_stmt = gimple_build_assign (new_var, new_name);
3551               new_name = make_ssa_name (new_var, init_stmt);
3552               gimple_assign_set_lhs (init_stmt, new_name);
3553               new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3554               gcc_assert (!new_bb);
3555               if (dump_enabled_p ())
3556                 {
3557                   dump_printf_loc (MSG_NOTE, vect_location,
3558                                    "created new init_stmt: ");
3559                   dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
3560                   dump_printf (MSG_NOTE, "\n");
3561                 }
3562               constant_p = false;
3563             }
3564           CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
3565         }
3566       /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
3567       if (constant_p)
3568         new_vec = build_vector_from_ctor (vectype, v);
3569       else
3570         new_vec = build_constructor (vectype, v);
3571       vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
3572     }
3573
3574
3575   /* Create the vector that holds the step of the induction.  */
3576   if (nested_in_vect_loop)
3577     /* iv_loop is nested in the loop to be vectorized. Generate:
3578        vec_step = [S, S, S, S]  */
3579     new_name = step_expr;
3580   else
3581     {
3582       /* iv_loop is the loop to be vectorized. Generate:
3583           vec_step = [VF*S, VF*S, VF*S, VF*S]  */
3584       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3585         {
3586           expr = build_int_cst (integer_type_node, vf);
3587           expr = fold_convert (TREE_TYPE (step_expr), expr);
3588         }
3589       else
3590         expr = build_int_cst (TREE_TYPE (step_expr), vf);
3591       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3592                               expr, step_expr);
3593       if (TREE_CODE (step_expr) == SSA_NAME)
3594         new_name = vect_init_vector (iv_phi, new_name,
3595                                      TREE_TYPE (step_expr), NULL);
3596     }
3597
3598   t = unshare_expr (new_name);
3599   gcc_assert (CONSTANT_CLASS_P (new_name)
3600               || TREE_CODE (new_name) == SSA_NAME);
3601   stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3602   gcc_assert (stepvectype);
3603   new_vec = build_vector_from_val (stepvectype, t);
3604   vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3605
3606
3607   /* Create the following def-use cycle:
3608      loop prolog:
3609          vec_init = ...
3610          vec_step = ...
3611      loop:
3612          vec_iv = PHI <vec_init, vec_loop>
3613          ...
3614          STMT
3615          ...
3616          vec_loop = vec_iv + vec_step;  */
3617
3618   /* Create the induction-phi that defines the induction-operand.  */
3619   vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
3620   induction_phi = create_phi_node (vec_dest, iv_loop->header);
3621   set_vinfo_for_stmt (induction_phi,
3622                       new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
3623   induc_def = PHI_RESULT (induction_phi);
3624
3625   /* Create the iv update inside the loop  */
3626   new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
3627   vec_def = make_ssa_name (vec_dest, new_stmt);
3628   gimple_assign_set_lhs (new_stmt, vec_def);
3629   gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3630   set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
3631                                                    NULL));
3632
3633   /* Set the arguments of the phi node:  */
3634   add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
3635   add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
3636                UNKNOWN_LOCATION);
3637
3638
3639   /* In case that vectorization factor (VF) is bigger than the number
3640      of elements that we can fit in a vectype (nunits), we have to generate
3641      more than one vector stmt - i.e - we need to "unroll" the
3642      vector stmt by a factor VF/nunits.  For more details see documentation
3643      in vectorizable_operation.  */
3644
3645   if (ncopies > 1)
3646     {
3647       stmt_vec_info prev_stmt_vinfo;
3648       /* FORNOW. This restriction should be relaxed.  */
3649       gcc_assert (!nested_in_vect_loop);
3650
3651       /* Create the vector that holds the step of the induction.  */
3652       if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3653         {
3654           expr = build_int_cst (integer_type_node, nunits);
3655           expr = fold_convert (TREE_TYPE (step_expr), expr);
3656         }
3657       else
3658         expr = build_int_cst (TREE_TYPE (step_expr), nunits);
3659       new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3660                               expr, step_expr);
3661       if (TREE_CODE (step_expr) == SSA_NAME)
3662         new_name = vect_init_vector (iv_phi, new_name,
3663                                      TREE_TYPE (step_expr), NULL);
3664       t = unshare_expr (new_name);
3665       gcc_assert (CONSTANT_CLASS_P (new_name)
3666                   || TREE_CODE (new_name) == SSA_NAME);
3667       new_vec = build_vector_from_val (stepvectype, t);
3668       vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
3669
3670       vec_def = induc_def;
3671       prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3672       for (i = 1; i < ncopies; i++)
3673         {
3674           /* vec_i = vec_prev + vec_step  */
3675           new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3676                                           vec_def, vec_step);
3677           vec_def = make_ssa_name (vec_dest, new_stmt);
3678           gimple_assign_set_lhs (new_stmt, vec_def);
3679
3680           gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3681           if (!useless_type_conversion_p (resvectype, vectype))
3682             {
3683               new_stmt
3684                 = gimple_build_assign
3685                         (vect_get_new_vect_var (resvectype, vect_simple_var,
3686                                                 "vec_iv_"),
3687                          VIEW_CONVERT_EXPR,
3688                          build1 (VIEW_CONVERT_EXPR, resvectype,
3689                                  gimple_assign_lhs (new_stmt)));
3690               gimple_assign_set_lhs (new_stmt,
3691                                      make_ssa_name
3692                                        (gimple_assign_lhs (new_stmt), new_stmt));
3693               gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3694             }
3695           set_vinfo_for_stmt (new_stmt,
3696                               new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3697           STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
3698           prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
3699         }
3700     }
3701
3702   if (nested_in_vect_loop)
3703     {
3704       /* Find the loop-closed exit-phi of the induction, and record
3705          the final vector of induction results:  */
3706       exit_phi = NULL;
3707       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3708         {
3709           gimple use_stmt = USE_STMT (use_p);
3710           if (is_gimple_debug (use_stmt))
3711             continue;
3712
3713           if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
3714             {
3715               exit_phi = use_stmt;
3716               break;
3717             }
3718         }
3719       if (exit_phi)
3720         {
3721           stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3722           /* FORNOW. Currently not supporting the case that an inner-loop induction
3723              is not used in the outer-loop (i.e. only outside the outer-loop).  */
3724           gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3725                       && !STMT_VINFO_LIVE_P (stmt_vinfo));
3726
3727           STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
3728           if (dump_enabled_p ())
3729             {
3730               dump_printf_loc (MSG_NOTE, vect_location,
3731                                "vector of inductions after inner-loop:");
3732               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
3733               dump_printf (MSG_NOTE, "\n");
3734             }
3735         }
3736     }
3737
3738
3739   if (dump_enabled_p ())
3740     {
3741       dump_printf_loc (MSG_NOTE, vect_location,
3742                        "transform induction: created def-use cycle: ");
3743       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3744       dump_printf (MSG_NOTE, "\n");
3745       dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3746                         SSA_NAME_DEF_STMT (vec_def), 0);
3747       dump_printf (MSG_NOTE, "\n");
3748     }
3749
3750   STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
3751   if (!useless_type_conversion_p (resvectype, vectype))
3752     {
3753       new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3754                                                              vect_simple_var,
3755                                                              "vec_iv_"),
3756                                       VIEW_CONVERT_EXPR,
3757                                       build1 (VIEW_CONVERT_EXPR, resvectype,
3758                                               induc_def));
3759       induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3760       gimple_assign_set_lhs (new_stmt, induc_def);
3761       si = gsi_after_labels (bb);
3762       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3763       set_vinfo_for_stmt (new_stmt,
3764                           new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3765       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3766         = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
3767     }
3768
3769   return induc_def;
3770 }
3771
3772
3773 /* Function get_initial_def_for_reduction
3774
3775    Input:
3776    STMT - a stmt that performs a reduction operation in the loop.
3777    INIT_VAL - the initial value of the reduction variable
3778
3779    Output:
3780    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3781         of the reduction (used for adjusting the epilog - see below).
3782    Return a vector variable, initialized according to the operation that STMT
3783         performs. This vector will be used as the initial value of the
3784         vector of partial results.
3785
3786    Option1 (adjust in epilog): Initialize the vector as follows:
3787      add/bit or/xor:    [0,0,...,0,0]
3788      mult/bit and:      [1,1,...,1,1]
3789      min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3790    and when necessary (e.g. add/mult case) let the caller know
3791    that it needs to adjust the result by init_val.
3792
3793    Option2: Initialize the vector as follows:
3794      add/bit or/xor:    [init_val,0,0,...,0]
3795      mult/bit and:      [init_val,1,1,...,1]
3796      min/max/cond_expr: [init_val,init_val,...,init_val]
3797    and no adjustments are needed.
3798
3799    For example, for the following code:
3800
3801    s = init_val;
3802    for (i=0;i<n;i++)
3803      s = s + a[i];
3804
3805    STMT is 's = s + a[i]', and the reduction variable is 's'.
3806    For a vector of 4 units, we want to return either [0,0,0,init_val],
3807    or [0,0,0,0] and let the caller know that it needs to adjust
3808    the result at the end by 'init_val'.
3809
3810    FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3811    initialization vector is simpler (same element in all entries), if
3812    ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3813
3814    A cost model should help decide between these two schemes.  */
3815
3816 tree
3817 get_initial_def_for_reduction (gimple stmt, tree init_val,
3818                                tree *adjustment_def)
3819 {
3820   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3821   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3822   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3823   tree scalar_type = TREE_TYPE (init_val);
3824   tree vectype = get_vectype_for_scalar_type (scalar_type);
3825   int nunits;
3826   enum tree_code code = gimple_assign_rhs_code (stmt);
3827   tree def_for_init;
3828   tree init_def;
3829   tree *elts;
3830   int i;
3831   bool nested_in_vect_loop = false;
3832   tree init_value;
3833   REAL_VALUE_TYPE real_init_val = dconst0;
3834   int int_init_val = 0;
3835   gimple def_stmt = NULL;
3836
3837   gcc_assert (vectype);
3838   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3839
3840   gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3841               || SCALAR_FLOAT_TYPE_P (scalar_type));
3842
3843   if (nested_in_vect_loop_p (loop, stmt))
3844     nested_in_vect_loop = true;
3845   else
3846     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3847
3848   /* In case of double reduction we only create a vector variable to be put
3849      in the reduction phi node.  The actual statement creation is done in
3850      vect_create_epilog_for_reduction.  */
3851   if (adjustment_def && nested_in_vect_loop
3852       && TREE_CODE (init_val) == SSA_NAME
3853       && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3854       && gimple_code (def_stmt) == GIMPLE_PHI
3855       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3856       && vinfo_for_stmt (def_stmt)
3857       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3858           == vect_double_reduction_def)
3859     {
3860       *adjustment_def = NULL;
3861       return vect_create_destination_var (init_val, vectype);
3862     }
3863
3864   if (TREE_CONSTANT (init_val))
3865     {
3866       if (SCALAR_FLOAT_TYPE_P (scalar_type))
3867         init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3868       else
3869         init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3870     }
3871   else
3872     init_value = init_val;
3873
3874   switch (code)
3875     {
3876       case WIDEN_SUM_EXPR:
3877       case DOT_PROD_EXPR:
3878       case SAD_EXPR:
3879       case PLUS_EXPR:
3880       case MINUS_EXPR:
3881       case BIT_IOR_EXPR:
3882       case BIT_XOR_EXPR:
3883       case MULT_EXPR:
3884       case BIT_AND_EXPR:
3885         /* ADJUSMENT_DEF is NULL when called from
3886            vect_create_epilog_for_reduction to vectorize double reduction.  */
3887         if (adjustment_def)
3888           {
3889             if (nested_in_vect_loop)
3890               *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3891                                                               NULL);
3892             else
3893               *adjustment_def = init_val;
3894           }
3895
3896         if (code == MULT_EXPR)
3897           {
3898             real_init_val = dconst1;
3899             int_init_val = 1;
3900           }
3901
3902         if (code == BIT_AND_EXPR)
3903           int_init_val = -1;
3904
3905         if (SCALAR_FLOAT_TYPE_P (scalar_type))
3906           def_for_init = build_real (scalar_type, real_init_val);
3907         else
3908           def_for_init = build_int_cst (scalar_type, int_init_val);
3909
3910         /* Create a vector of '0' or '1' except the first element.  */
3911         elts = XALLOCAVEC (tree, nunits);
3912         for (i = nunits - 2; i >= 0; --i)
3913           elts[i + 1] = def_for_init;
3914
3915         /* Option1: the first element is '0' or '1' as well.  */
3916         if (adjustment_def)
3917           {
3918             elts[0] = def_for_init;
3919             init_def = build_vector (vectype, elts);
3920             break;
3921           }
3922
3923         /* Option2: the first element is INIT_VAL.  */
3924         elts[0] = init_val;
3925         if (TREE_CONSTANT (init_val))
3926           init_def = build_vector (vectype, elts);
3927         else
3928           {
3929             vec<constructor_elt, va_gc> *v;
3930             vec_alloc (v, nunits);
3931             CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3932             for (i = 1; i < nunits; ++i)
3933               CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3934             init_def = build_constructor (vectype, v);
3935           }
3936
3937         break;
3938
3939       case MIN_EXPR:
3940       case MAX_EXPR:
3941       case COND_EXPR:
3942         if (adjustment_def)
3943           {
3944             *adjustment_def = NULL_TREE;
3945             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3946             break;
3947           }
3948
3949         init_def = build_vector_from_val (vectype, init_value);
3950         break;
3951
3952       default:
3953         gcc_unreachable ();
3954     }
3955
3956   return init_def;
3957 }
3958
3959 /* Function vect_create_epilog_for_reduction
3960
3961    Create code at the loop-epilog to finalize the result of a reduction
3962    computation.
3963
3964    VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3965      reduction statements.
3966    STMT is the scalar reduction stmt that is being vectorized.
3967    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3968      number of elements that we can fit in a vectype (nunits).  In this case
3969      we have to generate more than one vector stmt - i.e - we need to "unroll"
3970      the vector stmt by a factor VF/nunits.  For more details see documentation
3971      in vectorizable_operation.
3972    REDUC_CODE is the tree-code for the epilog reduction.
3973    REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3974      computation.
3975    REDUC_INDEX is the index of the operand in the right hand side of the
3976      statement that is defined by REDUCTION_PHI.
3977    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3978    SLP_NODE is an SLP node containing a group of reduction statements. The
3979      first one in this group is STMT.
3980
3981    This function:
3982    1. Creates the reduction def-use cycles: sets the arguments for
3983       REDUCTION_PHIS:
3984       The loop-entry argument is the vectorized initial-value of the reduction.
3985       The loop-latch argument is taken from VECT_DEFS - the vector of partial
3986       sums.
3987    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3988       by applying the operation specified by REDUC_CODE if available, or by
3989       other means (whole-vector shifts or a scalar loop).
3990       The function also creates a new phi node at the loop exit to preserve
3991       loop-closed form, as illustrated below.
3992
3993      The flow at the entry to this function:
3994
3995         loop:
3996           vec_def = phi <null, null>            # REDUCTION_PHI
3997           VECT_DEF = vector_stmt                # vectorized form of STMT
3998           s_loop = scalar_stmt                  # (scalar) STMT
3999         loop_exit:
4000           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4001           use <s_out0>
4002           use <s_out0>
4003
4004      The above is transformed by this function into:
4005
4006         loop:
4007           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4008           VECT_DEF = vector_stmt                # vectorized form of STMT
4009           s_loop = scalar_stmt                  # (scalar) STMT
4010         loop_exit:
4011           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4012           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4013           v_out2 = reduce <v_out1>
4014           s_out3 = extract_field <v_out2, 0>
4015           s_out4 = adjust_result <s_out3>
4016           use <s_out4>
4017           use <s_out4>
4018 */
4019
4020 static void
4021 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
4022                                   int ncopies, enum tree_code reduc_code,
4023                                   vec<gimple> reduction_phis,
4024                                   int reduc_index, bool double_reduc,
4025                                   slp_tree slp_node)
4026 {
4027   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4028   stmt_vec_info prev_phi_info;
4029   tree vectype;
4030   machine_mode mode;
4031   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4032   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4033   basic_block exit_bb;
4034   tree scalar_dest;
4035   tree scalar_type;
4036   gimple new_phi = NULL, phi;
4037   gimple_stmt_iterator exit_gsi;
4038   tree vec_dest;
4039   tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4040   gimple epilog_stmt = NULL;
4041   enum tree_code code = gimple_assign_rhs_code (stmt);
4042   gimple exit_phi;
4043   tree bitsize;
4044   tree adjustment_def = NULL;
4045   tree vec_initial_def = NULL;
4046   tree reduction_op, expr, def;
4047   tree orig_name, scalar_result;
4048   imm_use_iterator imm_iter, phi_imm_iter;
4049   use_operand_p use_p, phi_use_p;
4050   gimple use_stmt, orig_stmt, reduction_phi = NULL;
4051   bool nested_in_vect_loop = false;
4052   auto_vec<gimple> new_phis;
4053   auto_vec<gimple> inner_phis;
4054   enum vect_def_type dt = vect_unknown_def_type;
4055   int j, i;
4056   auto_vec<tree> scalar_results;
4057   unsigned int group_size = 1, k, ratio;
4058   auto_vec<tree> vec_initial_defs;
4059   auto_vec<gimple> phis;
4060   bool slp_reduc = false;
4061   tree new_phi_result;
4062   gimple inner_phi = NULL;
4063
4064   if (slp_node)
4065     group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4066
4067   if (nested_in_vect_loop_p (loop, stmt))
4068     {
4069       outer_loop = loop;
4070       loop = loop->inner;
4071       nested_in_vect_loop = true;
4072       gcc_assert (!slp_node);
4073     }
4074
4075   reduction_op = get_reduction_op (stmt, reduc_index);
4076
4077   vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
4078   gcc_assert (vectype);
4079   mode = TYPE_MODE (vectype);
4080
4081   /* 1. Create the reduction def-use cycle:
4082      Set the arguments of REDUCTION_PHIS, i.e., transform
4083
4084         loop:
4085           vec_def = phi <null, null>            # REDUCTION_PHI
4086           VECT_DEF = vector_stmt                # vectorized form of STMT
4087           ...
4088
4089      into:
4090
4091         loop:
4092           vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
4093           VECT_DEF = vector_stmt                # vectorized form of STMT
4094           ...
4095
4096      (in case of SLP, do it for all the phis). */
4097
4098   /* Get the loop-entry arguments.  */
4099   if (slp_node)
4100     vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4101                        NULL, slp_node, reduc_index);
4102   else
4103     {
4104       vec_initial_defs.create (1);
4105      /* For the case of reduction, vect_get_vec_def_for_operand returns
4106         the scalar def before the loop, that defines the initial value
4107         of the reduction variable.  */
4108       vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
4109                                                       &adjustment_def);
4110       vec_initial_defs.quick_push (vec_initial_def);
4111     }
4112
4113   /* Set phi nodes arguments.  */
4114   FOR_EACH_VEC_ELT (reduction_phis, i, phi)
4115     {
4116       tree vec_init_def, def;
4117       gimple_seq stmts;
4118       vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4119                                            true, NULL_TREE);
4120       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4121       def = vect_defs[i];
4122       for (j = 0; j < ncopies; j++)
4123         {
4124           /* Set the loop-entry arg of the reduction-phi.  */
4125           add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4126                        loop_preheader_edge (loop), UNKNOWN_LOCATION);
4127
4128           /* Set the loop-latch arg for the reduction-phi.  */
4129           if (j > 0)
4130             def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
4131
4132           add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4133                        UNKNOWN_LOCATION);
4134
4135           if (dump_enabled_p ())
4136             {
4137               dump_printf_loc (MSG_NOTE, vect_location,
4138                                "transform reduction: created def-use cycle: ");
4139               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4140               dump_printf (MSG_NOTE, "\n");
4141               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4142               dump_printf (MSG_NOTE, "\n");
4143             }
4144
4145           phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4146         }
4147     }
4148
4149   /* 2. Create epilog code.
4150         The reduction epilog code operates across the elements of the vector
4151         of partial results computed by the vectorized loop.
4152         The reduction epilog code consists of:
4153
4154         step 1: compute the scalar result in a vector (v_out2)
4155         step 2: extract the scalar result (s_out3) from the vector (v_out2)
4156         step 3: adjust the scalar result (s_out3) if needed.
4157
4158         Step 1 can be accomplished using one the following three schemes:
4159           (scheme 1) using reduc_code, if available.
4160           (scheme 2) using whole-vector shifts, if available.
4161           (scheme 3) using a scalar loop. In this case steps 1+2 above are
4162                      combined.
4163
4164           The overall epilog code looks like this:
4165
4166           s_out0 = phi <s_loop>         # original EXIT_PHI
4167           v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
4168           v_out2 = reduce <v_out1>              # step 1
4169           s_out3 = extract_field <v_out2, 0>    # step 2
4170           s_out4 = adjust_result <s_out3>       # step 3
4171
4172           (step 3 is optional, and steps 1 and 2 may be combined).
4173           Lastly, the uses of s_out0 are replaced by s_out4.  */
4174
4175
4176   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4177          v_out1 = phi <VECT_DEF>
4178          Store them in NEW_PHIS.  */
4179
4180   exit_bb = single_exit (loop)->dest;
4181   prev_phi_info = NULL;
4182   new_phis.create (vect_defs.length ());
4183   FOR_EACH_VEC_ELT (vect_defs, i, def)
4184     {
4185       for (j = 0; j < ncopies; j++)
4186         {
4187           tree new_def = copy_ssa_name (def);
4188           phi = create_phi_node (new_def, exit_bb);
4189           set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
4190           if (j == 0)
4191             new_phis.quick_push (phi);
4192           else
4193             {
4194               def = vect_get_vec_def_for_stmt_copy (dt, def);
4195               STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4196             }
4197
4198           SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4199           prev_phi_info = vinfo_for_stmt (phi);
4200         }
4201     }
4202
4203   /* The epilogue is created for the outer-loop, i.e., for the loop being
4204      vectorized.  Create exit phis for the outer loop.  */
4205   if (double_reduc)
4206     {
4207       loop = outer_loop;
4208       exit_bb = single_exit (loop)->dest;
4209       inner_phis.create (vect_defs.length ());
4210       FOR_EACH_VEC_ELT (new_phis, i, phi)
4211         {
4212           tree new_result = copy_ssa_name (PHI_RESULT (phi));
4213           gphi *outer_phi = create_phi_node (new_result, exit_bb);
4214           SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4215                            PHI_RESULT (phi));
4216           set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4217                                                             loop_vinfo, NULL));
4218           inner_phis.quick_push (phi);
4219           new_phis[i] = outer_phi;
4220           prev_phi_info = vinfo_for_stmt (outer_phi);
4221           while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4222             {
4223               phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4224               new_result = copy_ssa_name (PHI_RESULT (phi));
4225               outer_phi = create_phi_node (new_result, exit_bb);
4226               SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4227                                PHI_RESULT (phi));
4228               set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4229                                                         loop_vinfo, NULL));
4230               STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4231               prev_phi_info = vinfo_for_stmt (outer_phi);
4232             }
4233         }
4234     }
4235
4236   exit_gsi = gsi_after_labels (exit_bb);
4237
4238   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4239          (i.e. when reduc_code is not available) and in the final adjustment
4240          code (if needed).  Also get the original scalar reduction variable as
4241          defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
4242          represents a reduction pattern), the tree-code and scalar-def are
4243          taken from the original stmt that the pattern-stmt (STMT) replaces.
4244          Otherwise (it is a regular reduction) - the tree-code and scalar-def
4245          are taken from STMT.  */
4246
4247   orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4248   if (!orig_stmt)
4249     {
4250       /* Regular reduction  */
4251       orig_stmt = stmt;
4252     }
4253   else
4254     {
4255       /* Reduction pattern  */
4256       stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4257       gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4258       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4259     }
4260
4261   code = gimple_assign_rhs_code (orig_stmt);
4262   /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4263      partial results are added and not subtracted.  */
4264   if (code == MINUS_EXPR)
4265     code = PLUS_EXPR;
4266
4267   scalar_dest = gimple_assign_lhs (orig_stmt);
4268   scalar_type = TREE_TYPE (scalar_dest);
4269   scalar_results.create (group_size);
4270   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4271   bitsize = TYPE_SIZE (scalar_type);
4272
4273   /* In case this is a reduction in an inner-loop while vectorizing an outer
4274      loop - we don't need to extract a single scalar result at the end of the
4275      inner-loop (unless it is double reduction, i.e., the use of reduction is
4276      outside the outer-loop).  The final vector of partial results will be used
4277      in the vectorized outer-loop, or reduced to a scalar result at the end of
4278      the outer-loop.  */
4279   if (nested_in_vect_loop && !double_reduc)
4280     goto vect_finalize_reduction;
4281
4282   /* SLP reduction without reduction chain, e.g.,
4283      # a1 = phi <a2, a0>
4284      # b1 = phi <b2, b0>
4285      a2 = operation (a1)
4286      b2 = operation (b1)  */
4287   slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4288
4289   /* In case of reduction chain, e.g.,
4290      # a1 = phi <a3, a0>
4291      a2 = operation (a1)
4292      a3 = operation (a2),
4293
4294      we may end up with more than one vector result.  Here we reduce them to
4295      one vector.  */
4296   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4297     {
4298       tree first_vect = PHI_RESULT (new_phis[0]);
4299       tree tmp;
4300       gassign *new_vec_stmt = NULL;
4301
4302       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4303       for (k = 1; k < new_phis.length (); k++)
4304         {
4305           gimple next_phi = new_phis[k];
4306           tree second_vect = PHI_RESULT (next_phi);
4307
4308           tmp = build2 (code, vectype,  first_vect, second_vect);
4309           new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4310           first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4311           gimple_assign_set_lhs (new_vec_stmt, first_vect);
4312           gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4313         }
4314
4315       new_phi_result = first_vect;
4316       if (new_vec_stmt)
4317         {
4318           new_phis.truncate (0);
4319           new_phis.safe_push (new_vec_stmt);
4320         }
4321     }
4322   else
4323     new_phi_result = PHI_RESULT (new_phis[0]);
4324
4325   /* 2.3 Create the reduction code, using one of the three schemes described
4326          above. In SLP we simply need to extract all the elements from the
4327          vector (without reducing them), so we use scalar shifts.  */
4328   if (reduc_code != ERROR_MARK && !slp_reduc)
4329     {
4330       tree tmp;
4331       tree vec_elem_type;
4332
4333       /*** Case 1:  Create:
4334            v_out2 = reduc_expr <v_out1>  */
4335
4336       if (dump_enabled_p ())
4337         dump_printf_loc (MSG_NOTE, vect_location,
4338                          "Reduce using direct vector reduction.\n");
4339
4340       vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4341       if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4342         {
4343           tree tmp_dest =
4344               vect_create_destination_var (scalar_dest, vec_elem_type);
4345           tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4346           epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4347           new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4348           gimple_assign_set_lhs (epilog_stmt, new_temp);
4349           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4350
4351           tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4352         }
4353       else
4354         tmp = build1 (reduc_code, scalar_type, new_phi_result);
4355       epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4356       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4357       gimple_assign_set_lhs (epilog_stmt, new_temp);
4358       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4359       scalar_results.safe_push (new_temp);
4360     }
4361   else
4362     {
4363       bool reduce_with_shift = have_whole_vector_shift (mode);
4364       int element_bitsize = tree_to_uhwi (bitsize);
4365       int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4366       tree vec_temp;
4367
4368       /* Regardless of whether we have a whole vector shift, if we're
4369          emulating the operation via tree-vect-generic, we don't want
4370          to use it.  Only the first round of the reduction is likely
4371          to still be profitable via emulation.  */
4372       /* ??? It might be better to emit a reduction tree code here, so that
4373          tree-vect-generic can expand the first round via bit tricks.  */
4374       if (!VECTOR_MODE_P (mode))
4375         reduce_with_shift = false;
4376       else
4377         {
4378           optab optab = optab_for_tree_code (code, vectype, optab_default);
4379           if (optab_handler (optab, mode) == CODE_FOR_nothing)
4380             reduce_with_shift = false;
4381         }
4382
4383       if (reduce_with_shift && !slp_reduc)
4384         {
4385           int nelements = vec_size_in_bits / element_bitsize;
4386           unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4387
4388           int elt_offset;
4389
4390           tree zero_vec = build_zero_cst (vectype);
4391           /*** Case 2: Create:
4392              for (offset = nelements/2; offset >= 1; offset/=2)
4393                 {
4394                   Create:  va' = vec_shift <va, offset>
4395                   Create:  va = vop <va, va'>
4396                 }  */
4397
4398           tree rhs;
4399
4400           if (dump_enabled_p ())
4401             dump_printf_loc (MSG_NOTE, vect_location,
4402                              "Reduce using vector shifts\n");
4403
4404           vec_dest = vect_create_destination_var (scalar_dest, vectype);
4405           new_temp = new_phi_result;
4406           for (elt_offset = nelements / 2;
4407                elt_offset >= 1;
4408                elt_offset /= 2)
4409             {
4410               calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4411               tree mask = vect_gen_perm_mask_any (vectype, sel);
4412               epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4413                                                  new_temp, zero_vec, mask);
4414               new_name = make_ssa_name (vec_dest, epilog_stmt);
4415               gimple_assign_set_lhs (epilog_stmt, new_name);
4416               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4417
4418               epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4419                                                  new_temp);
4420               new_temp = make_ssa_name (vec_dest, epilog_stmt);
4421               gimple_assign_set_lhs (epilog_stmt, new_temp);
4422               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4423             }
4424
4425           /* 2.4  Extract the final scalar result.  Create:
4426              s_out3 = extract_field <v_out2, bitpos>  */
4427
4428           if (dump_enabled_p ())
4429             dump_printf_loc (MSG_NOTE, vect_location,
4430                              "extract scalar result\n");
4431
4432           rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4433                         bitsize, bitsize_zero_node);
4434           epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4435           new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4436           gimple_assign_set_lhs (epilog_stmt, new_temp);
4437           gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4438           scalar_results.safe_push (new_temp);
4439         }
4440       else
4441         {
4442           /*** Case 3: Create:
4443              s = extract_field <v_out2, 0>
4444              for (offset = element_size;
4445                   offset < vector_size;
4446                   offset += element_size;)
4447                {
4448                  Create:  s' = extract_field <v_out2, offset>
4449                  Create:  s = op <s, s'>  // For non SLP cases
4450                }  */
4451
4452           if (dump_enabled_p ())
4453             dump_printf_loc (MSG_NOTE, vect_location,
4454                              "Reduce using scalar code.\n");
4455
4456           vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4457           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4458             {
4459               int bit_offset;
4460               if (gimple_code (new_phi) == GIMPLE_PHI)
4461                 vec_temp = PHI_RESULT (new_phi);
4462               else
4463                 vec_temp = gimple_assign_lhs (new_phi);
4464               tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
4465                             bitsize_zero_node);
4466               epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4467               new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4468               gimple_assign_set_lhs (epilog_stmt, new_temp);
4469               gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4470
4471               /* In SLP we don't need to apply reduction operation, so we just
4472                  collect s' values in SCALAR_RESULTS.  */
4473               if (slp_reduc)
4474                 scalar_results.safe_push (new_temp);
4475
4476               for (bit_offset = element_bitsize;
4477                    bit_offset < vec_size_in_bits;
4478                    bit_offset += element_bitsize)
4479                 {
4480                   tree bitpos = bitsize_int (bit_offset);
4481                   tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4482                                      bitsize, bitpos);
4483
4484                   epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4485                   new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4486                   gimple_assign_set_lhs (epilog_stmt, new_name);
4487                   gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4488
4489                   if (slp_reduc)
4490                     {
4491                       /* In SLP we don't need to apply reduction operation, so
4492                          we just collect s' values in SCALAR_RESULTS.  */
4493                       new_temp = new_name;
4494                       scalar_results.safe_push (new_name);
4495                     }
4496                   else
4497                     {
4498                       epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4499                                                          new_name, new_temp);
4500                       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4501                       gimple_assign_set_lhs (epilog_stmt, new_temp);
4502                       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4503                     }
4504                 }
4505             }
4506
4507           /* The only case where we need to reduce scalar results in SLP, is
4508              unrolling.  If the size of SCALAR_RESULTS is greater than
4509              GROUP_SIZE, we reduce them combining elements modulo
4510              GROUP_SIZE.  */
4511           if (slp_reduc)
4512             {
4513               tree res, first_res, new_res;
4514               gimple new_stmt;
4515
4516               /* Reduce multiple scalar results in case of SLP unrolling.  */
4517               for (j = group_size; scalar_results.iterate (j, &res);
4518                    j++)
4519                 {
4520                   first_res = scalar_results[j % group_size];
4521                   new_stmt = gimple_build_assign (new_scalar_dest, code,
4522                                                   first_res, res);
4523                   new_res = make_ssa_name (new_scalar_dest, new_stmt);
4524                   gimple_assign_set_lhs (new_stmt, new_res);
4525                   gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
4526                   scalar_results[j % group_size] = new_res;
4527                 }
4528             }
4529           else
4530             /* Not SLP - we have one scalar to keep in SCALAR_RESULTS.  */
4531             scalar_results.safe_push (new_temp);
4532         }
4533     }
4534
4535 vect_finalize_reduction:
4536
4537   if (double_reduc)
4538     loop = loop->inner;
4539
4540   /* 2.5 Adjust the final result by the initial value of the reduction
4541          variable. (When such adjustment is not needed, then
4542          'adjustment_def' is zero).  For example, if code is PLUS we create:
4543          new_temp = loop_exit_def + adjustment_def  */
4544
4545   if (adjustment_def)
4546     {
4547       gcc_assert (!slp_reduc);
4548       if (nested_in_vect_loop)
4549         {
4550           new_phi = new_phis[0];
4551           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4552           expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4553           new_dest = vect_create_destination_var (scalar_dest, vectype);
4554         }
4555       else
4556         {
4557           new_temp = scalar_results[0];
4558           gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4559           expr = build2 (code, scalar_type, new_temp, adjustment_def);
4560           new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4561         }
4562
4563       epilog_stmt = gimple_build_assign (new_dest, expr);
4564       new_temp = make_ssa_name (new_dest, epilog_stmt);
4565       gimple_assign_set_lhs (epilog_stmt, new_temp);
4566       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4567       if (nested_in_vect_loop)
4568         {
4569           set_vinfo_for_stmt (epilog_stmt,
4570                               new_stmt_vec_info (epilog_stmt, loop_vinfo,
4571                                                  NULL));
4572           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4573                 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4574
4575           if (!double_reduc)
4576             scalar_results.quick_push (new_temp);
4577           else
4578             scalar_results[0] = new_temp;
4579         }
4580       else
4581         scalar_results[0] = new_temp;
4582
4583       new_phis[0] = epilog_stmt;
4584     }
4585
4586   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
4587           phis with new adjusted scalar results, i.e., replace use <s_out0>
4588           with use <s_out4>.
4589
4590      Transform:
4591         loop_exit:
4592           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4593           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4594           v_out2 = reduce <v_out1>
4595           s_out3 = extract_field <v_out2, 0>
4596           s_out4 = adjust_result <s_out3>
4597           use <s_out0>
4598           use <s_out0>
4599
4600      into:
4601
4602         loop_exit:
4603           s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
4604           v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
4605           v_out2 = reduce <v_out1>
4606           s_out3 = extract_field <v_out2, 0>
4607           s_out4 = adjust_result <s_out3>
4608           use <s_out4>
4609           use <s_out4> */
4610
4611
4612   /* In SLP reduction chain we reduce vector results into one vector if
4613      necessary, hence we set here GROUP_SIZE to 1.  SCALAR_DEST is the LHS of
4614      the last stmt in the reduction chain, since we are looking for the loop
4615      exit phi node.  */
4616   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4617     {
4618       gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
4619       /* Handle reduction patterns.  */
4620       if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
4621         dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
4622
4623       scalar_dest = gimple_assign_lhs (dest_stmt);
4624       group_size = 1;
4625     }
4626
4627   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
4628      case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
4629      need to match SCALAR_RESULTS with corresponding statements.  The first
4630      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4631      the first vector stmt, etc.
4632      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */
4633   if (group_size > new_phis.length ())
4634     {
4635       ratio = group_size / new_phis.length ();
4636       gcc_assert (!(group_size % new_phis.length ()));
4637     }
4638   else
4639     ratio = 1;
4640
4641   for (k = 0; k < group_size; k++)
4642     {
4643       if (k % ratio == 0)
4644         {
4645           epilog_stmt = new_phis[k / ratio];
4646           reduction_phi = reduction_phis[k / ratio];
4647           if (double_reduc)
4648             inner_phi = inner_phis[k / ratio];
4649         }
4650
4651       if (slp_reduc)
4652         {
4653           gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
4654
4655           orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4656           /* SLP statements can't participate in patterns.  */
4657           gcc_assert (!orig_stmt);
4658           scalar_dest = gimple_assign_lhs (current_stmt);
4659         }
4660
4661       phis.create (3);
4662       /* Find the loop-closed-use at the loop exit of the original scalar
4663          result.  (The reduction result is expected to have two immediate uses -
4664          one at the latch block, and one at the loop exit).  */
4665       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4666         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4667             && !is_gimple_debug (USE_STMT (use_p)))
4668           phis.safe_push (USE_STMT (use_p));
4669
4670       /* While we expect to have found an exit_phi because of loop-closed-ssa
4671          form we can end up without one if the scalar cycle is dead.  */
4672
4673       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4674         {
4675           if (outer_loop)
4676             {
4677               stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
4678               gphi *vect_phi;
4679
4680               /* FORNOW. Currently not supporting the case that an inner-loop
4681                  reduction is not used in the outer-loop (but only outside the
4682                  outer-loop), unless it is double reduction.  */
4683               gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4684                            && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4685                           || double_reduc);
4686
4687               if (double_reduc)
4688                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
4689               else
4690                 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4691               if (!double_reduc
4692                   || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4693                       != vect_double_reduction_def)
4694                 continue;
4695
4696               /* Handle double reduction:
4697
4698                  stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
4699                  stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4700                  stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
4701                  stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
4702
4703                  At that point the regular reduction (stmt2 and stmt3) is
4704                  already vectorized, as well as the exit phi node, stmt4.
4705                  Here we vectorize the phi node of double reduction, stmt1, and
4706                  update all relevant statements.  */
4707
4708               /* Go through all the uses of s2 to find double reduction phi
4709                  node, i.e., stmt1 above.  */
4710               orig_name = PHI_RESULT (exit_phi);
4711               FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4712                 {
4713                   stmt_vec_info use_stmt_vinfo;
4714                   stmt_vec_info new_phi_vinfo;
4715                   tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4716                   basic_block bb = gimple_bb (use_stmt);
4717                   gimple use;
4718
4719                   /* Check that USE_STMT is really double reduction phi
4720                      node.  */
4721                   if (gimple_code (use_stmt) != GIMPLE_PHI
4722                       || gimple_phi_num_args (use_stmt) != 2
4723                       || bb->loop_father != outer_loop)
4724                     continue;
4725                   use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4726                   if (!use_stmt_vinfo
4727                       || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4728                           != vect_double_reduction_def)
4729                     continue;
4730
4731                   /* Create vector phi node for double reduction:
4732                      vs1 = phi <vs0, vs2>
4733                      vs1 was created previously in this function by a call to
4734                        vect_get_vec_def_for_operand and is stored in
4735                        vec_initial_def;
4736                      vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
4737                      vs0 is created here.  */
4738
4739                   /* Create vector phi node.  */
4740                   vect_phi = create_phi_node (vec_initial_def, bb);
4741                   new_phi_vinfo = new_stmt_vec_info (vect_phi,
4742                                     loop_vec_info_for_loop (outer_loop), NULL);
4743                   set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4744
4745                   /* Create vs0 - initial def of the double reduction phi.  */
4746                   preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4747                                              loop_preheader_edge (outer_loop));
4748                   init_def = get_initial_def_for_reduction (stmt,
4749                                                           preheader_arg, NULL);
4750                   vect_phi_init = vect_init_vector (use_stmt, init_def,
4751                                                     vectype, NULL);
4752
4753                   /* Update phi node arguments with vs0 and vs2.  */
4754                   add_phi_arg (vect_phi, vect_phi_init,
4755                                loop_preheader_edge (outer_loop),
4756                                UNKNOWN_LOCATION);
4757                   add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
4758                                loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
4759                   if (dump_enabled_p ())
4760                     {
4761                       dump_printf_loc (MSG_NOTE, vect_location,
4762                                        "created double reduction phi node: ");
4763                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
4764                       dump_printf (MSG_NOTE, "\n");
4765                     }
4766
4767                   vect_phi_res = PHI_RESULT (vect_phi);
4768
4769                   /* Replace the use, i.e., set the correct vs1 in the regular
4770                      reduction phi node.  FORNOW, NCOPIES is always 1, so the
4771                      loop is redundant.  */
4772                   use = reduction_phi;
4773                   for (j = 0; j < ncopies; j++)
4774                     {
4775                       edge pr_edge = loop_preheader_edge (loop);
4776                       SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4777                       use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4778                     }
4779                 }
4780             }
4781         }
4782
4783       phis.release ();
4784       if (nested_in_vect_loop)
4785         {
4786           if (double_reduc)
4787             loop = outer_loop;
4788           else
4789             continue;
4790         }
4791
4792       phis.create (3);
4793       /* Find the loop-closed-use at the loop exit of the original scalar
4794          result.  (The reduction result is expected to have two immediate uses,
4795          one at the latch block, and one at the loop exit).  For double
4796          reductions we are looking for exit phis of the outer loop.  */
4797       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4798         {
4799           if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
4800             {
4801               if (!is_gimple_debug (USE_STMT (use_p)))
4802                 phis.safe_push (USE_STMT (use_p));
4803             }
4804           else
4805             {
4806               if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4807                 {
4808                   tree phi_res = PHI_RESULT (USE_STMT (use_p));
4809
4810                   FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4811                     {
4812                       if (!flow_bb_inside_loop_p (loop,
4813                                              gimple_bb (USE_STMT (phi_use_p)))
4814                           && !is_gimple_debug (USE_STMT (phi_use_p)))
4815                         phis.safe_push (USE_STMT (phi_use_p));
4816                     }
4817                 }
4818             }
4819         }
4820
4821       FOR_EACH_VEC_ELT (phis, i, exit_phi)
4822         {
4823           /* Replace the uses:  */
4824           orig_name = PHI_RESULT (exit_phi);
4825           scalar_result = scalar_results[k];
4826           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4827             FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4828               SET_USE (use_p, scalar_result);
4829         }
4830
4831       phis.release ();
4832     }
4833 }
4834
4835
4836 /* Function vectorizable_reduction.
4837
4838    Check if STMT performs a reduction operation that can be vectorized.
4839    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4840    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4841    Return FALSE if not a vectorizable STMT, TRUE otherwise.
4842
4843    This function also handles reduction idioms (patterns) that have been
4844    recognized in advance during vect_pattern_recog.  In this case, STMT may be
4845    of this form:
4846      X = pattern_expr (arg0, arg1, ..., X)
4847    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4848    sequence that had been detected and replaced by the pattern-stmt (STMT).
4849
4850    In some cases of reduction patterns, the type of the reduction variable X is
4851    different than the type of the other arguments of STMT.
4852    In such cases, the vectype that is used when transforming STMT into a vector
4853    stmt is different than the vectype that is used to determine the
4854    vectorization factor, because it consists of a different number of elements
4855    than the actual number of elements that are being operated upon in parallel.
4856
4857    For example, consider an accumulation of shorts into an int accumulator.
4858    On some targets it's possible to vectorize this pattern operating on 8
4859    shorts at a time (hence, the vectype for purposes of determining the
4860    vectorization factor should be V8HI); on the other hand, the vectype that
4861    is used to create the vector form is actually V4SI (the type of the result).
4862
4863    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4864    indicates what is the actual level of parallelism (V8HI in the example), so
4865    that the right vectorization factor would be derived.  This vectype
4866    corresponds to the type of arguments to the reduction stmt, and should *NOT*
4867    be used to create the vectorized stmt.  The right vectype for the vectorized
4868    stmt is obtained from the type of the result X:
4869         get_vectype_for_scalar_type (TREE_TYPE (X))
4870
4871    This means that, contrary to "regular" reductions (or "regular" stmts in
4872    general), the following equation:
4873       STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4874    does *NOT* necessarily hold for reduction patterns.  */
4875
4876 bool
4877 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
4878                         gimple *vec_stmt, slp_tree slp_node)
4879 {
4880   tree vec_dest;
4881   tree scalar_dest;
4882   tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4883   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4884   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4885   tree vectype_in = NULL_TREE;
4886   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4887   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4888   enum tree_code code, orig_code, epilog_reduc_code;
4889   machine_mode vec_mode;
4890   int op_type;
4891   optab optab, reduc_optab;
4892   tree new_temp = NULL_TREE;
4893   tree def;
4894   gimple def_stmt;
4895   enum vect_def_type dt;
4896   gphi *new_phi = NULL;
4897   tree scalar_type;
4898   bool is_simple_use;
4899   gimple orig_stmt;
4900   stmt_vec_info orig_stmt_info;
4901   tree expr = NULL_TREE;
4902   int i;
4903   int ncopies;
4904   int epilog_copies;
4905   stmt_vec_info prev_stmt_info, prev_phi_info;
4906   bool single_defuse_cycle = false;
4907   tree reduc_def = NULL_TREE;
4908   gimple new_stmt = NULL;
4909   int j;
4910   tree ops[3];
4911   bool nested_cycle = false, found_nested_cycle_def = false;
4912   gimple reduc_def_stmt = NULL;
4913   bool double_reduc = false, dummy;
4914   basic_block def_bb;
4915   struct loop * def_stmt_loop, *outer_loop = NULL;
4916   tree def_arg;
4917   gimple def_arg_stmt;
4918   auto_vec<tree> vec_oprnds0;
4919   auto_vec<tree> vec_oprnds1;
4920   auto_vec<tree> vect_defs;
4921   auto_vec<gimple> phis;
4922   int vec_num;
4923   tree def0, def1, tem, op0, op1 = NULL_TREE;
4924   bool first_p = true;
4925
4926   /* In case of reduction chain we switch to the first stmt in the chain, but
4927      we don't update STMT_INFO, since only the last stmt is marked as reduction
4928      and has reduction properties.  */
4929   if (GROUP_FIRST_ELEMENT (stmt_info)
4930       && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
4931     {
4932       stmt = GROUP_FIRST_ELEMENT (stmt_info);
4933       first_p = false;
4934     }
4935
4936   if (nested_in_vect_loop_p (loop, stmt))
4937     {
4938       outer_loop = loop;
4939       loop = loop->inner;
4940       nested_cycle = true;
4941     }
4942
4943   /* 1. Is vectorizable reduction?  */
4944   /* Not supportable if the reduction variable is used in the loop, unless
4945      it's a reduction chain.  */
4946   if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4947       && !GROUP_FIRST_ELEMENT (stmt_info))
4948     return false;
4949
4950   /* Reductions that are not used even in an enclosing outer-loop,
4951      are expected to be "live" (used out of the loop).  */
4952   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4953       && !STMT_VINFO_LIVE_P (stmt_info))
4954     return false;
4955
4956   /* Make sure it was already recognized as a reduction computation.  */
4957   if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
4958       && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
4959     return false;
4960
4961   /* 2. Has this been recognized as a reduction pattern?
4962
4963      Check if STMT represents a pattern that has been recognized
4964      in earlier analysis stages.  For stmts that represent a pattern,
4965      the STMT_VINFO_RELATED_STMT field records the last stmt in
4966      the original sequence that constitutes the pattern.  */
4967
4968   orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
4969   if (orig_stmt)
4970     {
4971       orig_stmt_info = vinfo_for_stmt (orig_stmt);
4972       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4973       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4974     }
4975
4976   /* 3. Check the operands of the operation.  The first operands are defined
4977         inside the loop body. The last operand is the reduction variable,
4978         which is defined by the loop-header-phi.  */
4979
4980   gcc_assert (is_gimple_assign (stmt));
4981
4982   /* Flatten RHS.  */
4983   switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4984     {
4985     case GIMPLE_SINGLE_RHS:
4986       op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4987       if (op_type == ternary_op)
4988         {
4989           tree rhs = gimple_assign_rhs1 (stmt);
4990           ops[0] = TREE_OPERAND (rhs, 0);
4991           ops[1] = TREE_OPERAND (rhs, 1);
4992           ops[2] = TREE_OPERAND (rhs, 2);
4993           code = TREE_CODE (rhs);
4994         }
4995       else
4996         return false;
4997       break;
4998
4999     case GIMPLE_BINARY_RHS:
5000       code = gimple_assign_rhs_code (stmt);
5001       op_type = TREE_CODE_LENGTH (code);
5002       gcc_assert (op_type == binary_op);
5003       ops[0] = gimple_assign_rhs1 (stmt);
5004       ops[1] = gimple_assign_rhs2 (stmt);
5005       break;
5006
5007     case GIMPLE_TERNARY_RHS:
5008       code = gimple_assign_rhs_code (stmt);
5009       op_type = TREE_CODE_LENGTH (code);
5010       gcc_assert (op_type == ternary_op);
5011       ops[0] = gimple_assign_rhs1 (stmt);
5012       ops[1] = gimple_assign_rhs2 (stmt);
5013       ops[2] = gimple_assign_rhs3 (stmt);
5014       break;
5015
5016     case GIMPLE_UNARY_RHS:
5017       return false;
5018
5019     default:
5020       gcc_unreachable ();
5021     }
5022   /* The default is that the reduction variable is the last in statement.  */
5023   int reduc_index = op_type - 1;
5024
5025   if (code == COND_EXPR && slp_node)
5026     return false;
5027
5028   scalar_dest = gimple_assign_lhs (stmt);
5029   scalar_type = TREE_TYPE (scalar_dest);
5030   if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
5031       && !SCALAR_FLOAT_TYPE_P (scalar_type))
5032     return false;
5033
5034   /* Do not try to vectorize bit-precision reductions.  */
5035   if ((TYPE_PRECISION (scalar_type)
5036        != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
5037     return false;
5038
5039   /* All uses but the last are expected to be defined in the loop.
5040      The last use is the reduction variable.  In case of nested cycle this
5041      assumption is not true: we use reduc_index to record the index of the
5042      reduction variable.  */
5043   for (i = 0; i < op_type - 1; i++)
5044     {
5045       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
5046       if (i == 0 && code == COND_EXPR)
5047         continue;
5048
5049       is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
5050                                             &def_stmt, &def, &dt, &tem);
5051       if (!vectype_in)
5052         vectype_in = tem;
5053       gcc_assert (is_simple_use);
5054
5055       if (dt != vect_internal_def
5056           && dt != vect_external_def
5057           && dt != vect_constant_def
5058           && dt != vect_induction_def
5059           && !(dt == vect_nested_cycle && nested_cycle))
5060         return false;
5061
5062       if (dt == vect_nested_cycle)
5063         {
5064           found_nested_cycle_def = true;
5065           reduc_def_stmt = def_stmt;
5066           reduc_index = i;
5067         }
5068     }
5069
5070   is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
5071                                         &def_stmt, &def, &dt, &tem);
5072   if (!vectype_in)
5073     vectype_in = tem;
5074   gcc_assert (is_simple_use);
5075   if (!found_nested_cycle_def)
5076     reduc_def_stmt = def_stmt;
5077
5078   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
5079     return false;
5080
5081   if (!(dt == vect_reduction_def
5082         || dt == vect_nested_cycle
5083         || ((dt == vect_internal_def || dt == vect_external_def
5084              || dt == vect_constant_def || dt == vect_induction_def)
5085             && nested_cycle && found_nested_cycle_def)))
5086     {
5087       /* For pattern recognized stmts, orig_stmt might be a reduction,
5088          but some helper statements for the pattern might not, or
5089          might be COND_EXPRs with reduction uses in the condition.  */
5090       gcc_assert (orig_stmt);
5091       return false;
5092     }
5093
5094   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
5095                                          !nested_cycle, &dummy, false);
5096   if (orig_stmt)
5097     gcc_assert (tmp == orig_stmt
5098                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
5099   else
5100     /* We changed STMT to be the first stmt in reduction chain, hence we
5101        check that in this case the first element in the chain is STMT.  */
5102     gcc_assert (stmt == tmp
5103                 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
5104
5105   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
5106     return false;
5107
5108   if (slp_node || PURE_SLP_STMT (stmt_info))
5109     ncopies = 1;
5110   else
5111     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5112                / TYPE_VECTOR_SUBPARTS (vectype_in));
5113
5114   gcc_assert (ncopies >= 1);
5115
5116   vec_mode = TYPE_MODE (vectype_in);
5117
5118   if (code == COND_EXPR)
5119     {
5120       if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
5121         {
5122           if (dump_enabled_p ())
5123             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5124                              "unsupported condition in reduction\n");
5125
5126           return false;
5127         }
5128     }
5129   else
5130     {
5131       /* 4. Supportable by target?  */
5132
5133       if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5134           || code == LROTATE_EXPR || code == RROTATE_EXPR)
5135         {
5136           /* Shifts and rotates are only supported by vectorizable_shifts,
5137              not vectorizable_reduction.  */
5138           if (dump_enabled_p ())
5139             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5140                              "unsupported shift or rotation.\n");
5141           return false;
5142         }
5143
5144       /* 4.1. check support for the operation in the loop  */
5145       optab = optab_for_tree_code (code, vectype_in, optab_default);
5146       if (!optab)
5147         {
5148           if (dump_enabled_p ())
5149             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5150                              "no optab.\n");
5151
5152           return false;
5153         }
5154
5155       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5156         {
5157           if (dump_enabled_p ())
5158             dump_printf (MSG_NOTE, "op not supported by target.\n");
5159
5160           if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5161               || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5162                   < vect_min_worthwhile_factor (code))
5163             return false;
5164
5165           if (dump_enabled_p ())
5166             dump_printf (MSG_NOTE, "proceeding using word mode.\n");
5167         }
5168
5169       /* Worthwhile without SIMD support?  */
5170       if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
5171           && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5172              < vect_min_worthwhile_factor (code))
5173         {
5174           if (dump_enabled_p ())
5175             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5176                              "not worthwhile without SIMD support.\n");
5177
5178           return false;
5179         }
5180     }
5181
5182   /* 4.2. Check support for the epilog operation.
5183
5184           If STMT represents a reduction pattern, then the type of the
5185           reduction variable may be different than the type of the rest
5186           of the arguments.  For example, consider the case of accumulation
5187           of shorts into an int accumulator; The original code:
5188                         S1: int_a = (int) short_a;
5189           orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
5190
5191           was replaced with:
5192                         STMT: int_acc = widen_sum <short_a, int_acc>
5193
5194           This means that:
5195           1. The tree-code that is used to create the vector operation in the
5196              epilog code (that reduces the partial results) is not the
5197              tree-code of STMT, but is rather the tree-code of the original
5198              stmt from the pattern that STMT is replacing.  I.e, in the example
5199              above we want to use 'widen_sum' in the loop, but 'plus' in the
5200              epilog.
5201           2. The type (mode) we use to check available target support
5202              for the vector operation to be created in the *epilog*, is
5203              determined by the type of the reduction variable (in the example
5204              above we'd check this: optab_handler (plus_optab, vect_int_mode])).
5205              However the type (mode) we use to check available target support
5206              for the vector operation to be created *inside the loop*, is
5207              determined by the type of the other arguments to STMT (in the
5208              example we'd check this: optab_handler (widen_sum_optab,
5209              vect_short_mode)).
5210
5211           This is contrary to "regular" reductions, in which the types of all
5212           the arguments are the same as the type of the reduction variable.
5213           For "regular" reductions we can therefore use the same vector type
5214           (and also the same tree-code) when generating the epilog code and
5215           when generating the code inside the loop.  */
5216
5217   if (orig_stmt)
5218     {
5219       /* This is a reduction pattern: get the vectype from the type of the
5220          reduction variable, and get the tree-code from orig_stmt.  */
5221       orig_code = gimple_assign_rhs_code (orig_stmt);
5222       gcc_assert (vectype_out);
5223       vec_mode = TYPE_MODE (vectype_out);
5224     }
5225   else
5226     {
5227       /* Regular reduction: use the same vectype and tree-code as used for
5228          the vector code inside the loop can be used for the epilog code. */
5229       orig_code = code;
5230     }
5231
5232   if (nested_cycle)
5233     {
5234       def_bb = gimple_bb (reduc_def_stmt);
5235       def_stmt_loop = def_bb->loop_father;
5236       def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5237                                        loop_preheader_edge (def_stmt_loop));
5238       if (TREE_CODE (def_arg) == SSA_NAME
5239           && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5240           && gimple_code (def_arg_stmt) == GIMPLE_PHI
5241           && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5242           && vinfo_for_stmt (def_arg_stmt)
5243           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5244               == vect_double_reduction_def)
5245         double_reduc = true;
5246     }
5247
5248   epilog_reduc_code = ERROR_MARK;
5249   if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5250     {
5251       reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
5252                                          optab_default);
5253       if (!reduc_optab)
5254         {
5255           if (dump_enabled_p ())
5256             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5257                              "no optab for reduction.\n");
5258
5259           epilog_reduc_code = ERROR_MARK;
5260         }
5261       else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
5262         {
5263           optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5264           if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5265             {
5266               if (dump_enabled_p ())
5267                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5268                                  "reduc op not supported by target.\n");
5269
5270               epilog_reduc_code = ERROR_MARK;
5271             }
5272         }
5273     }
5274   else
5275     {
5276       if (!nested_cycle || double_reduc)
5277         {
5278           if (dump_enabled_p ())
5279             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5280                              "no reduc code for scalar code.\n");
5281
5282           return false;
5283         }
5284     }
5285
5286   if (double_reduc && ncopies > 1)
5287     {
5288       if (dump_enabled_p ())
5289         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5290                          "multiple types in double reduction\n");
5291
5292       return false;
5293     }
5294
5295   /* In case of widenning multiplication by a constant, we update the type
5296      of the constant to be the type of the other operand.  We check that the
5297      constant fits the type in the pattern recognition pass.  */
5298   if (code == DOT_PROD_EXPR
5299       && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5300     {
5301       if (TREE_CODE (ops[0]) == INTEGER_CST)
5302         ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5303       else if (TREE_CODE (ops[1]) == INTEGER_CST)
5304         ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5305       else
5306         {
5307           if (dump_enabled_p ())
5308             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5309                              "invalid types in dot-prod\n");
5310
5311           return false;
5312         }
5313     }
5314
5315   if (!vec_stmt) /* transformation not required.  */
5316     {
5317       if (first_p
5318           && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
5319                                          reduc_index))
5320         return false;
5321       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5322       return true;
5323     }
5324
5325   /** Transform.  **/
5326
5327   if (dump_enabled_p ())
5328     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
5329
5330   /* FORNOW: Multiple types are not supported for condition.  */
5331   if (code == COND_EXPR)
5332     gcc_assert (ncopies == 1);
5333
5334   /* Create the destination vector  */
5335   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5336
5337   /* In case the vectorization factor (VF) is bigger than the number
5338      of elements that we can fit in a vectype (nunits), we have to generate
5339      more than one vector stmt - i.e - we need to "unroll" the
5340      vector stmt by a factor VF/nunits.  For more details see documentation
5341      in vectorizable_operation.  */
5342
5343   /* If the reduction is used in an outer loop we need to generate
5344      VF intermediate results, like so (e.g. for ncopies=2):
5345         r0 = phi (init, r0)
5346         r1 = phi (init, r1)
5347         r0 = x0 + r0;
5348         r1 = x1 + r1;
5349     (i.e. we generate VF results in 2 registers).
5350     In this case we have a separate def-use cycle for each copy, and therefore
5351     for each copy we get the vector def for the reduction variable from the
5352     respective phi node created for this copy.
5353
5354     Otherwise (the reduction is unused in the loop nest), we can combine
5355     together intermediate results, like so (e.g. for ncopies=2):
5356         r = phi (init, r)
5357         r = x0 + r;
5358         r = x1 + r;
5359    (i.e. we generate VF/2 results in a single register).
5360    In this case for each copy we get the vector def for the reduction variable
5361    from the vectorized reduction operation generated in the previous iteration.
5362   */
5363
5364   if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
5365     {
5366       single_defuse_cycle = true;
5367       epilog_copies = 1;
5368     }
5369   else
5370     epilog_copies = ncopies;
5371
5372   prev_stmt_info = NULL;
5373   prev_phi_info = NULL;
5374   if (slp_node)
5375     vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5376   else
5377     {
5378       vec_num = 1;
5379       vec_oprnds0.create (1);
5380       if (op_type == ternary_op)
5381         vec_oprnds1.create (1);
5382     }
5383
5384   phis.create (vec_num);
5385   vect_defs.create (vec_num);
5386   if (!slp_node)
5387     vect_defs.quick_push (NULL_TREE);
5388
5389   for (j = 0; j < ncopies; j++)
5390     {
5391       if (j == 0 || !single_defuse_cycle)
5392         {
5393           for (i = 0; i < vec_num; i++)
5394             {
5395               /* Create the reduction-phi that defines the reduction
5396                  operand.  */
5397               new_phi = create_phi_node (vec_dest, loop->header);
5398               set_vinfo_for_stmt (new_phi,
5399                                   new_stmt_vec_info (new_phi, loop_vinfo,
5400                                                      NULL));
5401                if (j == 0 || slp_node)
5402                  phis.quick_push (new_phi);
5403             }
5404         }
5405
5406       if (code == COND_EXPR)
5407         {
5408           gcc_assert (!slp_node);
5409           vectorizable_condition (stmt, gsi, vec_stmt,
5410                                   PHI_RESULT (phis[0]),
5411                                   reduc_index, NULL);
5412           /* Multiple types are not supported for condition.  */
5413           break;
5414         }
5415
5416       /* Handle uses.  */
5417       if (j == 0)
5418         {
5419           op0 = ops[!reduc_index];
5420           if (op_type == ternary_op)
5421             {
5422               if (reduc_index == 0)
5423                 op1 = ops[2];
5424               else
5425                 op1 = ops[1];
5426             }
5427
5428           if (slp_node)
5429             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5430                                slp_node, -1);
5431           else
5432             {
5433               loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5434                                                             stmt, NULL);
5435               vec_oprnds0.quick_push (loop_vec_def0);
5436               if (op_type == ternary_op)
5437                {
5438                  loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5439                                                                NULL);
5440                  vec_oprnds1.quick_push (loop_vec_def1);
5441                }
5442             }
5443         }
5444       else
5445         {
5446           if (!slp_node)
5447             {
5448               enum vect_def_type dt;
5449               gimple dummy_stmt;
5450               tree dummy;
5451
5452               vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
5453                                   &dummy_stmt, &dummy, &dt);
5454               loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5455                                                               loop_vec_def0);
5456               vec_oprnds0[0] = loop_vec_def0;
5457               if (op_type == ternary_op)
5458                 {
5459                   vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
5460                                       &dummy, &dt);
5461                   loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5462                                                                 loop_vec_def1);
5463                   vec_oprnds1[0] = loop_vec_def1;
5464                 }
5465             }
5466
5467           if (single_defuse_cycle)
5468             reduc_def = gimple_assign_lhs (new_stmt);
5469
5470           STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
5471         }
5472
5473       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5474         {
5475           if (slp_node)
5476             reduc_def = PHI_RESULT (phis[i]);
5477           else
5478             {
5479               if (!single_defuse_cycle || j == 0)
5480                 reduc_def = PHI_RESULT (new_phi);
5481             }
5482
5483           def1 = ((op_type == ternary_op)
5484                   ? vec_oprnds1[i] : NULL);
5485           if (op_type == binary_op)
5486             {
5487               if (reduc_index == 0)
5488                 expr = build2 (code, vectype_out, reduc_def, def0);
5489               else
5490                 expr = build2 (code, vectype_out, def0, reduc_def);
5491             }
5492           else
5493             {
5494               if (reduc_index == 0)
5495                 expr = build3 (code, vectype_out, reduc_def, def0, def1);
5496               else
5497                 {
5498                   if (reduc_index == 1)
5499                     expr = build3 (code, vectype_out, def0, reduc_def, def1);
5500                   else
5501                     expr = build3 (code, vectype_out, def0, def1, reduc_def);
5502                 }
5503             }
5504
5505           new_stmt = gimple_build_assign (vec_dest, expr);
5506           new_temp = make_ssa_name (vec_dest, new_stmt);
5507           gimple_assign_set_lhs (new_stmt, new_temp);
5508           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5509
5510           if (slp_node)
5511             {
5512               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5513               vect_defs.quick_push (new_temp);
5514             }
5515           else
5516             vect_defs[0] = new_temp;
5517         }
5518
5519       if (slp_node)
5520         continue;
5521
5522       if (j == 0)
5523         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5524       else
5525         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5526
5527       prev_stmt_info = vinfo_for_stmt (new_stmt);
5528       prev_phi_info = vinfo_for_stmt (new_phi);
5529     }
5530
5531   /* Finalize the reduction-phi (set its arguments) and create the
5532      epilog reduction code.  */
5533   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5534     {
5535       new_temp = gimple_assign_lhs (*vec_stmt);
5536       vect_defs[0] = new_temp;
5537     }
5538
5539   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5540                                     epilog_reduc_code, phis, reduc_index,
5541                                     double_reduc, slp_node);
5542
5543   return true;
5544 }
5545
5546 /* Function vect_min_worthwhile_factor.
5547
5548    For a loop where we could vectorize the operation indicated by CODE,
5549    return the minimum vectorization factor that makes it worthwhile
5550    to use generic vectors.  */
5551 int
5552 vect_min_worthwhile_factor (enum tree_code code)
5553 {
5554   switch (code)
5555     {
5556     case PLUS_EXPR:
5557     case MINUS_EXPR:
5558     case NEGATE_EXPR:
5559       return 4;
5560
5561     case BIT_AND_EXPR:
5562     case BIT_IOR_EXPR:
5563     case BIT_XOR_EXPR:
5564     case BIT_NOT_EXPR:
5565       return 2;
5566
5567     default:
5568       return INT_MAX;
5569     }
5570 }
5571
5572
5573 /* Function vectorizable_induction
5574
5575    Check if PHI performs an induction computation that can be vectorized.
5576    If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5577    phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5578    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5579
5580 bool
5581 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5582                         gimple *vec_stmt)
5583 {
5584   stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5585   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5586   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5587   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5588   int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5589   int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5590   tree vec_def;
5591
5592   gcc_assert (ncopies >= 1);
5593   /* FORNOW. These restrictions should be relaxed.  */
5594   if (nested_in_vect_loop_p (loop, phi))
5595     {
5596       imm_use_iterator imm_iter;
5597       use_operand_p use_p;
5598       gimple exit_phi;
5599       edge latch_e;
5600       tree loop_arg;
5601
5602       if (ncopies > 1)
5603         {
5604           if (dump_enabled_p ())
5605             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5606                              "multiple types in nested loop.\n");
5607           return false;
5608         }
5609
5610       exit_phi = NULL;
5611       latch_e = loop_latch_edge (loop->inner);
5612       loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5613       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5614         {
5615           gimple use_stmt = USE_STMT (use_p);
5616           if (is_gimple_debug (use_stmt))
5617             continue;
5618
5619           if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
5620             {
5621               exit_phi = use_stmt;
5622               break;
5623             }
5624         }
5625       if (exit_phi)
5626         {
5627           stmt_vec_info exit_phi_vinfo  = vinfo_for_stmt (exit_phi);
5628           if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5629                 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5630             {
5631               if (dump_enabled_p ())
5632                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5633                                  "inner-loop induction only used outside "
5634                                  "of the outer vectorized loop.\n");
5635               return false;
5636             }
5637         }
5638     }
5639
5640   if (!STMT_VINFO_RELEVANT_P (stmt_info))
5641     return false;
5642
5643   /* FORNOW: SLP not supported.  */
5644   if (STMT_SLP_TYPE (stmt_info))
5645     return false;
5646
5647   gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5648
5649   if (gimple_code (phi) != GIMPLE_PHI)
5650     return false;
5651
5652   if (!vec_stmt) /* transformation not required.  */
5653     {
5654       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
5655       if (dump_enabled_p ())
5656         dump_printf_loc (MSG_NOTE, vect_location,
5657                          "=== vectorizable_induction ===\n");
5658       vect_model_induction_cost (stmt_info, ncopies);
5659       return true;
5660     }
5661
5662   /** Transform.  **/
5663
5664   if (dump_enabled_p ())
5665     dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
5666
5667   vec_def = get_initial_def_for_induction (phi);
5668   *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5669   return true;
5670 }
5671
5672 /* Function vectorizable_live_operation.
5673
5674    STMT computes a value that is used outside the loop.  Check if
5675    it can be supported.  */
5676
5677 bool
5678 vectorizable_live_operation (gimple stmt,
5679                              gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5680                              gimple *vec_stmt)
5681 {
5682   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5683   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5684   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5685   int i;
5686   int op_type;
5687   tree op;
5688   tree def;
5689   gimple def_stmt;
5690   enum vect_def_type dt;
5691   enum tree_code code;
5692   enum gimple_rhs_class rhs_class;
5693
5694   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5695
5696   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5697     return false;
5698
5699   if (!is_gimple_assign (stmt))
5700     {
5701       if (gimple_call_internal_p (stmt)
5702           && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5703           && gimple_call_lhs (stmt)
5704           && loop->simduid
5705           && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5706           && loop->simduid
5707              == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5708         {
5709           edge e = single_exit (loop);
5710           basic_block merge_bb = e->dest;
5711           imm_use_iterator imm_iter;
5712           use_operand_p use_p;
5713           tree lhs = gimple_call_lhs (stmt);
5714
5715           FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5716             {
5717               gimple use_stmt = USE_STMT (use_p);
5718               if (gimple_code (use_stmt) == GIMPLE_PHI
5719                   && gimple_bb (use_stmt) == merge_bb)
5720                 {
5721                   if (vec_stmt)
5722                     {
5723                       tree vfm1
5724                         = build_int_cst (unsigned_type_node,
5725                                          loop_vinfo->vectorization_factor - 1);
5726                       SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5727                     }
5728                   return true;
5729                 }
5730             }
5731         }
5732
5733       return false;
5734     }
5735
5736   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5737     return false;
5738
5739   /* FORNOW. CHECKME. */
5740   if (nested_in_vect_loop_p (loop, stmt))
5741     return false;
5742
5743   code = gimple_assign_rhs_code (stmt);
5744   op_type = TREE_CODE_LENGTH (code);
5745   rhs_class = get_gimple_rhs_class (code);
5746   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5747   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5748
5749   /* FORNOW: support only if all uses are invariant.  This means
5750      that the scalar operations can remain in place, unvectorized.
5751      The original last scalar value that they compute will be used.  */
5752
5753   for (i = 0; i < op_type; i++)
5754     {
5755       if (rhs_class == GIMPLE_SINGLE_RHS)
5756         op = TREE_OPERAND (gimple_op (stmt, 1), i);
5757       else
5758         op = gimple_op (stmt, i + 1);
5759       if (op
5760           && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5761                                   &dt))
5762         {
5763           if (dump_enabled_p ())
5764             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5765                              "use not simple.\n");
5766           return false;
5767         }
5768
5769       if (dt != vect_external_def && dt != vect_constant_def)
5770         return false;
5771     }
5772
5773   /* No transformation is required for the cases we currently support.  */
5774   return true;
5775 }
5776
5777 /* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
5778
5779 static void
5780 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5781 {
5782   ssa_op_iter op_iter;
5783   imm_use_iterator imm_iter;
5784   def_operand_p def_p;
5785   gimple ustmt;
5786
5787   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5788     {
5789       FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5790         {
5791           basic_block bb;
5792
5793           if (!is_gimple_debug (ustmt))
5794             continue;
5795
5796           bb = gimple_bb (ustmt);
5797
5798           if (!flow_bb_inside_loop_p (loop, bb))
5799             {
5800               if (gimple_debug_bind_p (ustmt))
5801                 {
5802                   if (dump_enabled_p ())
5803                     dump_printf_loc (MSG_NOTE, vect_location,
5804                                      "killing debug use\n");
5805
5806                   gimple_debug_bind_reset_value (ustmt);
5807                   update_stmt (ustmt);
5808                 }
5809               else
5810                 gcc_unreachable ();
5811             }
5812         }
5813     }
5814 }
5815
5816
5817 /* This function builds ni_name = number of iterations.  Statements
5818    are emitted on the loop preheader edge.  */
5819
5820 static tree
5821 vect_build_loop_niters (loop_vec_info loop_vinfo)
5822 {
5823   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5824   if (TREE_CODE (ni) == INTEGER_CST)
5825     return ni;
5826   else
5827     {
5828       tree ni_name, var;
5829       gimple_seq stmts = NULL;
5830       edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
5831
5832       var = create_tmp_var (TREE_TYPE (ni), "niters");
5833       ni_name = force_gimple_operand (ni, &stmts, false, var);
5834       if (stmts)
5835         gsi_insert_seq_on_edge_immediate (pe, stmts);
5836
5837       return ni_name;
5838     }
5839 }
5840
5841
5842 /* This function generates the following statements:
5843
5844    ni_name = number of iterations loop executes
5845    ratio = ni_name / vf
5846    ratio_mult_vf_name = ratio * vf
5847
5848    and places them on the loop preheader edge.  */
5849
5850 static void
5851 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5852                                  tree ni_name,
5853                                  tree *ratio_mult_vf_name_ptr,
5854                                  tree *ratio_name_ptr)
5855 {
5856   tree ni_minus_gap_name;
5857   tree var;
5858   tree ratio_name;
5859   tree ratio_mult_vf_name;
5860   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5861   edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
5862   tree log_vf;
5863
5864   log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
5865
5866   /* If epilogue loop is required because of data accesses with gaps, we
5867      subtract one iteration from the total number of iterations here for
5868      correct calculation of RATIO.  */
5869   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5870     {
5871       ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5872                                        ni_name,
5873                                        build_one_cst (TREE_TYPE (ni_name)));
5874       if (!is_gimple_val (ni_minus_gap_name))
5875         {
5876           var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
5877           gimple stmts = NULL;
5878           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
5879                                                     true, var);
5880           gsi_insert_seq_on_edge_immediate (pe, stmts);
5881         }
5882     }
5883   else
5884     ni_minus_gap_name = ni_name;
5885
5886   /* Create: ratio = ni >> log2(vf) */
5887   /* ???  As we have ni == number of latch executions + 1, ni could
5888      have overflown to zero.  So avoid computing ratio based on ni
5889      but compute it using the fact that we know ratio will be at least
5890      one, thus via (ni - vf) >> log2(vf) + 1.  */
5891   ratio_name
5892     = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
5893                    fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
5894                                 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5895                                              ni_minus_gap_name,
5896                                              build_int_cst
5897                                                (TREE_TYPE (ni_name), vf)),
5898                                 log_vf),
5899                    build_int_cst (TREE_TYPE (ni_name), 1));
5900   if (!is_gimple_val (ratio_name))
5901     {
5902       var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
5903       gimple stmts = NULL;
5904       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
5905       gsi_insert_seq_on_edge_immediate (pe, stmts);
5906     }
5907   *ratio_name_ptr = ratio_name;
5908
5909   /* Create: ratio_mult_vf = ratio << log2 (vf).  */
5910
5911   if (ratio_mult_vf_name_ptr)
5912     {
5913       ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5914                                         ratio_name, log_vf);
5915       if (!is_gimple_val (ratio_mult_vf_name))
5916         {
5917           var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
5918           gimple stmts = NULL;
5919           ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
5920                                                      true, var);
5921           gsi_insert_seq_on_edge_immediate (pe, stmts);
5922         }
5923       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5924     }
5925
5926   return;
5927 }
5928
5929
5930 /* Function vect_transform_loop.
5931
5932    The analysis phase has determined that the loop is vectorizable.
5933    Vectorize the loop - created vectorized stmts to replace the scalar
5934    stmts in the loop, and update the loop exit condition.  */
5935
5936 void
5937 vect_transform_loop (loop_vec_info loop_vinfo)
5938 {
5939   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5940   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5941   int nbbs = loop->num_nodes;
5942   int i;
5943   tree ratio = NULL;
5944   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5945   bool grouped_store;
5946   bool slp_scheduled = false;
5947   gimple stmt, pattern_stmt;
5948   gimple_seq pattern_def_seq = NULL;
5949   gimple_stmt_iterator pattern_def_si = gsi_none ();
5950   bool transform_pattern_stmt = false;
5951   bool check_profitability = false;
5952   int th;
5953   /* Record number of iterations before we started tampering with the profile. */
5954   gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
5955
5956   if (dump_enabled_p ())
5957     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
5958
5959   /* If profile is inprecise, we have chance to fix it up.  */
5960   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5961     expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5962
5963   /* Use the more conservative vectorization threshold.  If the number
5964      of iterations is constant assume the cost check has been performed
5965      by our caller.  If the threshold makes all loops profitable that
5966      run at least the vectorization factor number of times checking
5967      is pointless, too.  */
5968   th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
5969   if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5970       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5971     {
5972       if (dump_enabled_p ())
5973         dump_printf_loc (MSG_NOTE, vect_location,
5974                          "Profitability threshold is %d loop iterations.\n",
5975                          th);
5976       check_profitability = true;
5977     }
5978
5979   /* Version the loop first, if required, so the profitability check
5980      comes first.  */
5981
5982   if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5983       || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
5984     {
5985       vect_loop_versioning (loop_vinfo, th, check_profitability);
5986       check_profitability = false;
5987     }
5988
5989   tree ni_name = vect_build_loop_niters (loop_vinfo);
5990   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
5991
5992   /* Peel the loop if there are data refs with unknown alignment.
5993      Only one data ref with unknown store is allowed.  */
5994
5995   if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
5996     {
5997       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
5998                                      th, check_profitability);
5999       check_profitability = false;
6000       /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
6001          be re-computed.  */
6002       ni_name = NULL_TREE;
6003     }
6004
6005   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
6006      compile time constant), or it is a constant that doesn't divide by the
6007      vectorization factor, then an epilog loop needs to be created.
6008      We therefore duplicate the loop: the original loop will be vectorized,
6009      and will compute the first (n/VF) iterations.  The second copy of the loop
6010      will remain scalar and will compute the remaining (n%VF) iterations.
6011      (VF is the vectorization factor).  */
6012
6013   if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
6014       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
6015     {
6016       tree ratio_mult_vf;
6017       if (!ni_name)
6018         ni_name = vect_build_loop_niters (loop_vinfo);
6019       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
6020                                        &ratio);
6021       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
6022                                       th, check_profitability);
6023     }
6024   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6025     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
6026                 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
6027   else
6028     {
6029       if (!ni_name)
6030         ni_name = vect_build_loop_niters (loop_vinfo);
6031       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
6032     }
6033
6034   /* 1) Make sure the loop header has exactly two entries
6035      2) Make sure we have a preheader basic block.  */
6036
6037   gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
6038
6039   split_edge (loop_preheader_edge (loop));
6040
6041   /* FORNOW: the vectorizer supports only loops which body consist
6042      of one basic block (header + empty latch). When the vectorizer will
6043      support more involved loop forms, the order by which the BBs are
6044      traversed need to be reconsidered.  */
6045
6046   for (i = 0; i < nbbs; i++)
6047     {
6048       basic_block bb = bbs[i];
6049       stmt_vec_info stmt_info;
6050
6051       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
6052            gsi_next (&si))
6053         {
6054           gphi *phi = si.phi ();
6055           if (dump_enabled_p ())
6056             {
6057               dump_printf_loc (MSG_NOTE, vect_location,
6058                                "------>vectorizing phi: ");
6059               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
6060               dump_printf (MSG_NOTE, "\n");
6061             }
6062           stmt_info = vinfo_for_stmt (phi);
6063           if (!stmt_info)
6064             continue;
6065
6066           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6067             vect_loop_kill_debug_uses (loop, phi);
6068
6069           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6070               && !STMT_VINFO_LIVE_P (stmt_info))
6071             continue;
6072
6073           if (STMT_VINFO_VECTYPE (stmt_info)
6074               && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6075                   != (unsigned HOST_WIDE_INT) vectorization_factor)
6076               && dump_enabled_p ())
6077             dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6078
6079           if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
6080             {
6081               if (dump_enabled_p ())
6082                 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
6083               vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
6084             }
6085         }
6086
6087       pattern_stmt = NULL;
6088       for (gimple_stmt_iterator si = gsi_start_bb (bb);
6089            !gsi_end_p (si) || transform_pattern_stmt;)
6090         {
6091           bool is_store;
6092
6093           if (transform_pattern_stmt)
6094             stmt = pattern_stmt;
6095           else
6096             {
6097               stmt = gsi_stmt (si);
6098               /* During vectorization remove existing clobber stmts.  */
6099               if (gimple_clobber_p (stmt))
6100                 {
6101                   unlink_stmt_vdef (stmt);
6102                   gsi_remove (&si, true);
6103                   release_defs (stmt);
6104                   continue;
6105                 }
6106             }
6107
6108           if (dump_enabled_p ())
6109             {
6110               dump_printf_loc (MSG_NOTE, vect_location,
6111                                "------>vectorizing statement: ");
6112               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6113               dump_printf (MSG_NOTE, "\n");
6114             }
6115
6116           stmt_info = vinfo_for_stmt (stmt);
6117
6118           /* vector stmts created in the outer-loop during vectorization of
6119              stmts in an inner-loop may not have a stmt_info, and do not
6120              need to be vectorized.  */
6121           if (!stmt_info)
6122             {
6123               gsi_next (&si);
6124               continue;
6125             }
6126
6127           if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6128             vect_loop_kill_debug_uses (loop, stmt);
6129
6130           if (!STMT_VINFO_RELEVANT_P (stmt_info)
6131               && !STMT_VINFO_LIVE_P (stmt_info))
6132             {
6133               if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6134                   && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6135                   && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6136                       || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6137                 {
6138                   stmt = pattern_stmt;
6139                   stmt_info = vinfo_for_stmt (stmt);
6140                 }
6141               else
6142                 {
6143                   gsi_next (&si);
6144                   continue;
6145                 }
6146             }
6147           else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6148                    && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6149                    && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6150                        || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6151             transform_pattern_stmt = true;
6152
6153           /* If pattern statement has def stmts, vectorize them too.  */
6154           if (is_pattern_stmt_p (stmt_info))
6155             {
6156               if (pattern_def_seq == NULL)
6157                 {
6158                   pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6159                   pattern_def_si = gsi_start (pattern_def_seq);
6160                 }
6161               else if (!gsi_end_p (pattern_def_si))
6162                 gsi_next (&pattern_def_si);
6163               if (pattern_def_seq != NULL)
6164                 {
6165                   gimple pattern_def_stmt = NULL;
6166                   stmt_vec_info pattern_def_stmt_info = NULL;
6167
6168                   while (!gsi_end_p (pattern_def_si))
6169                     {
6170                       pattern_def_stmt = gsi_stmt (pattern_def_si);
6171                       pattern_def_stmt_info
6172                         = vinfo_for_stmt (pattern_def_stmt);
6173                       if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6174                           || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6175                         break;
6176                       gsi_next (&pattern_def_si);
6177                     }
6178
6179                   if (!gsi_end_p (pattern_def_si))
6180                     {
6181                       if (dump_enabled_p ())
6182                         {
6183                           dump_printf_loc (MSG_NOTE, vect_location,
6184                                            "==> vectorizing pattern def "
6185                                            "stmt: ");
6186                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6187                                             pattern_def_stmt, 0);
6188                           dump_printf (MSG_NOTE, "\n");
6189                         }
6190
6191                       stmt = pattern_def_stmt;
6192                       stmt_info = pattern_def_stmt_info;
6193                     }
6194                   else
6195                     {
6196                       pattern_def_si = gsi_none ();
6197                       transform_pattern_stmt = false;
6198                     }
6199                 }
6200               else
6201                 transform_pattern_stmt = false;
6202             }
6203
6204           if (STMT_VINFO_VECTYPE (stmt_info))
6205             {
6206               unsigned int nunits
6207                 = (unsigned int)
6208                   TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6209               if (!STMT_SLP_TYPE (stmt_info)
6210                   && nunits != (unsigned int) vectorization_factor
6211                   && dump_enabled_p ())
6212                   /* For SLP VF is set according to unrolling factor, and not
6213                      to vector size, hence for SLP this print is not valid.  */
6214                 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6215             }
6216
6217           /* SLP. Schedule all the SLP instances when the first SLP stmt is
6218              reached.  */
6219           if (STMT_SLP_TYPE (stmt_info))
6220             {
6221               if (!slp_scheduled)
6222                 {
6223                   slp_scheduled = true;
6224
6225                   if (dump_enabled_p ())
6226                     dump_printf_loc (MSG_NOTE, vect_location,
6227                                      "=== scheduling SLP instances ===\n");
6228
6229                   vect_schedule_slp (loop_vinfo, NULL);
6230                 }
6231
6232               /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
6233               if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
6234                 {
6235                   if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6236                     {
6237                       pattern_def_seq = NULL;
6238                       gsi_next (&si);
6239                     }
6240                   continue;
6241                 }
6242             }
6243
6244           /* -------- vectorize statement ------------ */
6245           if (dump_enabled_p ())
6246             dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
6247
6248           grouped_store = false;
6249           is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
6250           if (is_store)
6251             {
6252               if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6253                 {
6254                   /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6255                      interleaving chain was completed - free all the stores in
6256                      the chain.  */
6257                   gsi_next (&si);
6258                   vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
6259                 }
6260               else
6261                 {
6262                   /* Free the attached stmt_vec_info and remove the stmt.  */
6263                   gimple store = gsi_stmt (si);
6264                   free_stmt_vec_info (store);
6265                   unlink_stmt_vdef (store);
6266                   gsi_remove (&si, true);
6267                   release_defs (store);
6268                 }
6269
6270               /* Stores can only appear at the end of pattern statements.  */
6271               gcc_assert (!transform_pattern_stmt);
6272               pattern_def_seq = NULL;
6273             }
6274           else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6275             {
6276               pattern_def_seq = NULL;
6277               gsi_next (&si);
6278             }
6279         }                       /* stmts in BB */
6280     }                           /* BBs in loop */
6281
6282   slpeel_make_loop_iterate_ntimes (loop, ratio);
6283
6284   /* Reduce loop iterations by the vectorization factor.  */
6285   scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
6286                       expected_iterations / vectorization_factor);
6287   loop->nb_iterations_upper_bound
6288     = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
6289   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6290       && loop->nb_iterations_upper_bound != 0)
6291     loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
6292   if (loop->any_estimate)
6293     {
6294       loop->nb_iterations_estimate
6295         = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
6296        if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
6297            && loop->nb_iterations_estimate != 0)
6298          loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
6299     }
6300
6301   if (dump_enabled_p ())
6302     {
6303       dump_printf_loc (MSG_NOTE, vect_location,
6304                        "LOOP VECTORIZED\n");
6305       if (loop->inner)
6306         dump_printf_loc (MSG_NOTE, vect_location,
6307                          "OUTER LOOP VECTORIZED\n");
6308       dump_printf (MSG_NOTE, "\n");
6309     }
6310 }